sylware / cgperf (public) (License: GNU GPLv3) (since 2021-07-12) (hash sha1)
gperf port to simple c89 with benign bits of c99/c11
List of commits:
Subject Hash Author Date (UTC)
Initial commit 4dead000f30ed99ab1b174116d39047db435c858 Sylvain BERTRAND 2021-07-12 17:34:23
Commit 4dead000f30ed99ab1b174116d39047db435c858 - Initial commit
Author: Sylvain BERTRAND
Author date (UTC): 2021-07-12 17:34
Committer name: Sylvain BERTRAND
Committer date (UTC): 2021-07-12 17:34
Signing key:
Signing status: N
Tree: 4910f1ff4dc47719966da822a9189b9d7d5333d4
File Lines added Lines deleted
all.c 12 0
bool-array.c 77 0
bool-array.h 36 0
c_fixing.h 16 0
getline.c 93 0
getline.h 20 0
globals.h 15 0
gperf.pdf 0 0
hash-table.c 156 0
hash-table.h 45 0
hash.c 34 0
hash.h 13 0
input.c 1016 0
input.h 53 0
keyword.c 148 0
keyword.h 68 0
keyword_list.c 133 0
keyword_list.h 36 0
main.c 121 0
namespace/bool-array.h 14 0
namespace/getline.h 8 0
namespace/globals.h 10 0
namespace/hash-table.h 18 0
namespace/hash.h 6 0
namespace/input.c 12 0
namespace/input.h 11 0
namespace/keyword.h 20 0
namespace/keyword_list.h 22 0
namespace/options.h 118 0
namespace/output.c 51 0
namespace/output.h 40 0
namespace/positions.h 62 0
namespace/search.c 14 0
namespace/search.h 56 0
options.c 940 0
options.h 178 0
output.c 2019 0
output.h 105 0
positions.c 350 0
positions.h 109 0
search.c 2075 0
search.h 116 0
tests/.gitignore 3 0
tests/ada-pred.exp 54 0
tests/ada-res.exp 63 0
tests/ada.gperf 63 0
tests/adadefs.gperf 54 0
tests/c++.gperf 47 0
tests/c-parse.exp 223 0
tests/c-parse.gperf 88 0
tests/c.exp 32 0
tests/c.gperf 32 0
tests/charsets.exp 1876 0
tests/charsets.gperf 800 0
tests/chill.exp 1564 0
tests/chill.gperf 308 0
tests/cplusplus.exp 273 0
tests/cplusplus.gperf 111 0
tests/gpc.exp 150 0
tests/gpc.gperf 48 0
tests/incomplete.exp 118 0
tests/incomplete.gperf 14 0
tests/irc.gperf 63 0
tests/java.exp 207 0
tests/java.gperf 80 0
tests/jscript.gperf 73 0
tests/jstest1.gperf 137 0
tests/jstest2.gperf 142 0
tests/jstest3.gperf 142 0
tests/jstest4.gperf 142 0
tests/lang-ucs2.exp 20 0
tests/lang-ucs2.gperf 26 0
tests/ 0 0
tests/lang-utf8.exp 20 0
tests/lang-utf8.gperf 26 0
tests/languages.exp 1494 0
tests/languages.gperf 699 0
tests/makeinfo.gperf 116 0
tests/modula.exp 106 0
tests/modula2.exp 250 0
tests/modula2.gperf 40 0
tests/modula3.gperf 106 0
tests/objc.exp 191 0
tests/objc.gperf 64 0
tests/pascal.exp 36 0
tests/pascal.gperf 36 0
tests/permut2.exp 107 0
tests/permut2.gperf 4 0
tests/permut3.exp 107 0
tests/permut3.gperf 4 0
tests/permutc2.exp 156 0
tests/permutc2.gperf 14 0
tests/smtp.gperf 206 0
tests/test-4.exp 227 0
tests/test-6.exp 140 0
tests/test-7.exp 32 0
tests/test.c 38 0
tests/test2.c 74 0
version.h 4 0
File ABBREVIATIONS added (mode: 100644) (index 0000000..5b72180)
1 kw KeyWord
2 kwl KeyWord_List
3 opts OPTionS
4 pos POSition
5 positer POSitionITERator
6 posrevit POSitionREVerseITerator
7 posstrp POSitionSTRingParser
8 t This
File CODING_STYLE added (mode: 100644) (index 0000000..82ea1f8)
1 - keep an eye on ABBREVIATIONS file (t is "this")
2 - names of complex types are mixed case-ed
3 - Indentation is tabs of 8 spaces, and we "prefer" lines of 100 chars which is
4 not a hard limit due to the depth of some brutal functions here and there
File README added (mode: 100644) (index 0000000..84b1a92)
1 You should not use gperf anymore, very probably. It "may" be still usefull in
2 use cases with _very_ intensive, time/memory critical, and massive, _really_
3 massive, keyword lists. And even there, you should seriously consider
4 alternatives which are saner and semantically optimized to your use case. And
5 nowadays we know that c++ is never a good idea: the elephant in the room is the
6 obvious c++ compiler implementation cost, which is a significant detriment to
7 foster "working" alternative compilers, not to mention it is very prone to the
8 rube goldberg machine syndrome.
10 Don't be a masochist trying to reverse engineer the heuristics from the code
11 first hand: Read gperfp.pdf before anything else.
13 This is a "port to _simple_ C89 with benign bits of c99/c11". It is a properly
14 namespace-ized (then "reuse-able" in any other project), "one compilation unit"
15 project: just compile all.c and link it to your libc and libm the way you want.
16 Of course, bugs were introduced while porting.
File all.c added (mode: 100644) (index 0000000..77a9cbb)
1 #include "positions.c"
2 #include "options.c"
3 #include "keyword.c"
4 #include "keyword_list.c"
5 #include "input.c"
6 #include "getline.c"
7 #include "search.c"
8 #include "hash-table.c"
9 #include "hash.c"
10 #include "bool-array.c"
11 #include "output.c"
12 #include "main.c"
File bool-array.c added (mode: 100644) (index 0000000..88f3c5a)
3 #include <stdbool.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "c_fixing.h"
7 #include "globals.h"
8 #include "options.h"
9 #include "bool-array.h"
10 /*------------------------------------------------------------------------------------------------*/
11 #include "namespace/globals.h"
12 #include "namespace/options.h"
13 #include "namespace/bool-array.h"
14 /*------------------------------------------------------------------------------------------------*/
15 /*{{{ ba_new */
16 static struct Bool_Array *ba_new(u32 size)
17 {
18 struct Bool_Array *t;
20 t = calloc(1, sizeof(*t));
21 t->size = size;
22 t->iteration_number = 1;
23 t->storage_array = calloc(size, sizeof(*(t->storage_array)));
24 if (OPTS(DEBUG))
25 fprintf (stderr, "\nbool array size = %d, total bytes = %d\n", t->size, (u32)(t->size * sizeof(t->storage_array[0])));
26 return t;
27 }/*}}}*/
28 /*{{{ ba_del */
29 static void ba_del(struct Bool_Array *t)
30 {
31 if (OPTS(DEBUG))
32 fprintf(stderr, "\ndumping boolean array information\nsize = %d\niteration number = %d\nend of array dump\n", t->size, t->iteration_number);
33 free(t->storage_array);
34 free(t);
35 }/*}}}*/
36 /*{{{ ba_clear */
37 /* resets all bits to zero */
38 static void ba_clear(struct Bool_Array *t)
39 {
40 /*
41 * If we wrap around it's time to zero things out again! However, this only occurs once
42 * about every 2^32 iterations, so it will not happen more frequently than once per second.
43 */
44 ++(t->iteration_number);
45 if (t->iteration_number == 0) {
46 t->iteration_number = 1;
47 memset(t->storage_array, 0, t->size * sizeof(*(t->storage_array)));
48 if (OPTS(DEBUG)) {
49 fprintf(stderr, "(re-initialized bool_array)\n");
50 fflush(stderr);
51 }
52 }
53 }/*}}}*/
54 /*{{{ ba_set_bit */
55 /*
56 * Sets the specified bit to true.
57 * Returns its previous value (false or true).
58 */
59 static bool ba_set_bit(struct Bool_Array *t, u32 index)
60 {
61 if (t->storage_array[index] == t->iteration_number)
62 /* the bit was set since the last clear() call */
63 return true;
64 else {
65 /* the last operation on this bit was clear(). Set it now. */
66 t->storage_array[index] = t->iteration_number;
67 return false;
68 }
69 }/*}}}*/
70 /*------------------------------------------------------------------------------------------------*/
71 #define EPILOG
72 #include "namespace/globals.h"
73 #include "namespace/options.h"
74 #include "namespace/bool-array.h"
75 #undef EPILOG
76 /*------------------------------------------------------------------------------------------------*/
77 #endif
File bool-array.h added (mode: 100644) (index 0000000..9558589)
3 #include "c_fixing.h"
4 /*------------------------------------------------------------------------------------------------*/
5 #include "namespace/bool-array.h"
6 /*------------------------------------------------------------------------------------------------*/
7 /*{{{ constants and types */
8 struct Bool_Array {
9 /*{{{ private */
10 /* size of array */
11 u32 size;
12 /*
13 * Current iteration number. Always nonzero. Starts out as 1, and is
14 * incremented each time clear() is called.
15 */
16 u32 iteration_number;
17 /*
18 * for each index, we store in storage_array[index] the
19 * iteration_number at the time set_bit(index) was last called
20 */
21 u32 *storage_array;
22 /*}}} private -- END */
23 };
24 /*}}} constants and types -- END */
25 /*{{{ public static methods */
26 static struct Bool_Array *ba_new(u32 size);
27 static void ba_del(struct Bool_Array *t);
28 static void ba_clear(struct Bool_Array *t);
29 static bool ba_set_bit(struct Bool_Array *t, u32 index);
30 /*}}} public static methods -- END */
31 /*------------------------------------------------------------------------------------------------*/
32 #define EPILOG
33 #include "namespace/bool-array.h"
34 #undef EPILOG
35 /*------------------------------------------------------------------------------------------------*/
36 #endif
File c_fixing.h added (mode: 100644) (index 0000000..a581cb0)
1 #ifndef C_FIXING_H
2 #define C_FIXING_H
3 #include <stdint.h>
4 #include <limits.h>
5 #define u8 uint8_t
6 #define u32 uint32_t
7 #define U32_MIN UINT_MIN
8 #define U32_MAX UINT_MAX
9 #define u64 uint64_t
10 #define s32 int32_t
11 #define S32_MIN INT_MIN
12 #define S32_MAX INT_MAX
13 #define f32 float
14 #define f64 double
15 #define loop for(;;)
16 #endif
File getline.c added (mode: 100644) (index 0000000..cae84c9)
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <assert.h>
7 #include "c_fixing.h"
8 #include "getline.h"
9 /*------------------------------------------------------------------------------------------------*/
10 #include "namespace/getline.h"
11 /*------------------------------------------------------------------------------------------------*/
12 /*{{{ get_delim */
13 static s32 get_delim(u8 **lineptr, u32 *n, s32 delimiter, FILE *stream)
14 {
15 return getstr(lineptr, n, stream, delimiter, 0);
16 }/*}}}*/
17 /*{{{ getstr */
18 /* always add at least this many bytes when extending the buffer */
19 #define MIN_CHUNK 64
20 /* Reads up to (and including) a TERMINATOR from STREAM into *LINEPTR + OFFSET
21 (and null-terminate it). *LINEPTR is a pointer returned from new [] (or
22 NULL), pointing to *N characters of space. It is realloc'd as
23 necessary. Returns the number of characters read (not including the
24 null terminator), or -1 on error or immediate EOF.
25 NOTE: There is another getstr() function declared in <curses.h>. */
26 static s32 getstr(u8 **lineptr, u32 *n, FILE *stream, u8 terminator,
27 u32 offset)
28 {
29 u32 nchars_avail; /* allocated but unused chars in *LINEPTR */
30 u8 *read_pos; /* Where we're reading into *LINEPTR. */
32 if (!lineptr || !n || !stream)
33 return -1;
34 if (!*lineptr) {
35 *n = MIN_CHUNK;
36 *lineptr = calloc(*n, sizeof(u8));
37 }
38 nchars_avail = *n - offset;
39 read_pos = *lineptr + offset;
40 loop {
41 s32 c;
43 c = getc(stream);
44 /*
45 * we always want at least one char left in the buffer, since we always (unless we
46 * get an error while reading the first char) NUL-terminate the line buffer
47 */
48 assert(*n - nchars_avail == (u32)(read_pos - *lineptr));
49 if (nchars_avail < 2) {
50 u8 *new_line;
52 if (*n > MIN_CHUNK)
53 *n *= 2;
54 else
55 *n += MIN_CHUNK;
57 nchars_avail = *n + *lineptr - read_pos;
58 new_line = calloc(*n, sizeof(u8));
59 if (*lineptr != 0) {
60 memcpy(new_line, *lineptr, read_pos - *lineptr);
61 free(*lineptr);
62 }
63 *lineptr = new_line;
64 read_pos = *n - nchars_avail + *lineptr;
65 assert(*n - nchars_avail == (u32)(read_pos - *lineptr));
66 }
67 if (c == EOF || ferror(stream)) {
68 /* return partial line, if any */
69 if (read_pos == *lineptr)
70 return -1;
71 else
72 break;
73 }
74 *read_pos++ = c;
75 nchars_avail--;
77 if (c == terminator)
78 /* return the line */
79 break;
80 }
81 /* done - NUL terminate and return the number of chars read */
82 *read_pos = '\0';
83 return read_pos - (*lineptr + offset);
84 }
85 #undef MIN_CHUNK
86 /*}}}*/
87 /*------------------------------------------------------------------------------------------------*/
88 #define EPILOG
89 #include "namespace/getline.h"
90 #undef EPILOG
91 /*------------------------------------------------------------------------------------------------*/
92 #endif
File getline.h added (mode: 100644) (index 0000000..3b2e9fe)
3 #include <stdio.h>
4 #include "c_fixing.h"
5 /*------------------------------------------------------------------------------------------------*/
6 #include "namespace/getline.h"
7 /*------------------------------------------------------------------------------------------------*/
8 /*{{{ local */
9 static s32 getstr(u8 **lineptr, u32 *n, FILE *stream, u8 terminator,
10 u32 offset);
11 /*}}}*/
12 /*{{{ public */
13 static s32 get_delim(u8 **lineptr, u32 *n, s32 delimiter, FILE *stream);
14 /*}}}*/
15 /*------------------------------------------------------------------------------------------------*/
16 #define EPILOG
17 #include "namespace/getline.h"
18 #undef EPILOG
19 /*------------------------------------------------------------------------------------------------*/
20 #endif
File globals.h added (mode: 100644) (index 0000000..0e9eb2b)
3 #include "options.h"
4 /*----------------------------------------------------------------------------*/
5 #include "namespace/globals.h"
6 #include "namespace/options.h"
7 /*----------------------------------------------------------------------------*/
8 static struct Options *options;
9 /*----------------------------------------------------------------------------*/
10 #define EPILOG
11 #include "namespace/globals.h"
12 #include "namespace/options.h"
13 #undef EPILOG
14 /*----------------------------------------------------------------------------*/
15 #endif
File gperf.pdf added (mode: 100644) (index 0000000..0b67e08)
File hash-table.c added (mode: 100644) (index 0000000..2df4f0b)
3 #include <stdbool.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdio.h>
7 #include "c_fixing.h"
8 #include "keyword.h"
9 #include "hash-table.h"
10 #include "hash.h"
11 /*------------------------------------------------------------------------------------------------*/
12 #include "namespace/hash-table.h"
13 #include "namespace/keyword.h"
14 #include "namespace/hash.h"
15 /*------------------------------------------------------------------------------------------------*/
16 /*{{{ ht_new */
17 /*
18 * We make the size of the hash table a power of 2. This allows for two optimizations: It eliminates
19 * the modulo instruction, and allows for an easy secondary hashing function.
20 */
21 static struct Hash_Table *ht_new(u32 size, bool ignore_length)
22 {
23 struct Hash_Table *t;
24 u32 shift;
26 t = calloc(1, sizeof(*t));
27 t->ignore_length = ignore_length;
29 /* there need to be enough spare entries */
30 size = size * (u32)ht_size_factor;
32 /* find smallest power of 2 that is >= size */
33 shift = 0;
34 if ((size >> 16) > 0) {
35 size = size >> 16;
36 shift += 16;
37 }
38 if ((size >> 8) > 0) {
39 size = size >> 8;
40 shift += 8;
41 }
42 if ((size >> 4) > 0) {
43 size = size >> 4;
44 shift += 4;
45 }
46 if ((size >> 2) > 0) {
47 size = size >> 2;
48 shift += 2;
49 }
50 if ((size >> 1) > 0) {
51 size = size >> 1;
52 shift += 1;
53 }
54 t->log_size = shift;
55 t->size = 1 << shift;
57 t->table = calloc(t->size, sizeof(*(t->table)));
58 return t;
59 }/*}}}*/
60 /*{{{ ht_del */
61 static void ht_del(struct Hash_Table *t)
62 {
63 free(t->table);
64 free(t);
65 }/*}}}*/
66 /*{{{ ht_insert */
67 /*
68 * Attempts to insert ITEM in the table. If there is already an equal entry in it, returns it.
69 * Otherwise inserts ITEM and returns NULL.
70 */
71 static struct Keyword *ht_insert(struct Hash_Table *t, struct Keyword *item)
72 {
73 u32 hash_val;
74 u32 probe;
75 u32 increment;
77 hash_val = hashpjw((u8*)item->selchars, item->selchars_length * sizeof(u32));
78 probe = hash_val & (t->size - 1);
79 increment = (((hash_val >> t->log_size) ^ (t->ignore_length ? 0 : item->allchars_length))
80 << 1) + 1;
81 /*
82 * note that because _size is a power of 2 and increment is odd, we have
83 * gcd(increment,_size) = 1, which guarantees that we'll find an empty entry during the loop
84 */
85 loop {
86 if (t->table[probe] == 0)
87 break;
88 if (ht_equal(t, t->table[probe], item))
89 return t->table[probe];
90 ++(t->collisions);
91 probe = (probe + increment) & (t->size - 1);
92 }
93 t->table[probe] = item;
94 return 0;
95 }/*}}}*/
96 /*{{{ ht_equal */
97 static bool ht_equal(struct Hash_Table *t, struct Keyword *item1, struct Keyword *item2)
98 {
99 return item1->selchars_length == item2->selchars_length
100 && memcmp(item1->selchars, item2->selchars, item1->selchars_length * sizeof(u32))
101 == 0
102 && (t->ignore_length || item1->allchars_length == item2->allchars_length);
103 }/*}}}*/
104 /*{{{ ht_dump */
105 static void ht_dump(struct Hash_Table *t)
106 {
107 s32 field_width;
108 s32 i;
110 field_width = 0;
111 {
112 s32 i;
114 i = t->size - 1;
115 loop {
116 if (i < 0)
117 break;
118 if (t->table[i] != 0)
119 if (field_width < t->table[i]->selchars_length)
120 field_width = t->table[i]->selchars_length;
121 i--;
122 }
123 }
124 fprintf(stderr, "\ndumping the hash table\ntotal available table slots = %d, total bytes = %d, total collisions = %d\nlocation, %*s, keyword\n", t->size, t->size * (u32)(sizeof(*(t->table))), t->collisions, field_width, "keysig");
126 i = t->size - 1;
127 loop {
128 if (i < 0)
129 break;
130 if (t->table[i] != 0) {
131 s32 j;
133 fprintf(stderr, "%8d, ", i);
134 if (field_width > t->table[i]->selchars_length)
135 fprintf(stderr, "%*s", field_width - t->table[i]->selchars_length, "");
136 j = 0;
137 loop {
138 if (j >= t->table[i]->selchars_length)
139 break;
140 putc(t->table[i]->selchars[j], stderr);
141 ++j;
142 }
143 fprintf(stderr, ", %.*s\n", t->table[i]->allchars_length, t->table[i]->allchars);
144 }
145 i--;
146 }
147 fprintf(stderr, "\nend dumping hash table\n\n");
148 }/*}}}*/
149 /*------------------------------------------------------------------------------------------------*/
150 #define EPILOG
151 #include "namespace/hash-table.h"
152 #include "namespace/keyword.h"
153 #include "namespace/hash.h"
154 #undef EPILOG
155 /*------------------------------------------------------------------------------------------------*/
156 #endif
File hash-table.h added (mode: 100644) (index 0000000..1fb95e3)
3 #include <stdbool.h>
4 #include "c_fixing.h"
5 #include "keyword.h"
6 /*------------------------------------------------------------------------------------------------*/
7 #include "namespace/hash-table.h"
8 #include "namespace/keyword.h"
9 /*------------------------------------------------------------------------------------------------*/
10 /*{{{ constants and types */
11 /* to make double hashing efficient, there need to be enough spare entries */
12 enum {
13 ht_size_factor = 10
14 };
15 struct Hash_Table {
16 /*{{{ private */
17 /* a detail of the comparison function */
18 bool ignore_length;
19 /* Statistics: Number of collisions so far. */
20 u32 collisions;
21 /* log2(_size). */
22 u32 log_size;
23 /* size of the vector */
24 u32 size;
25 /* vector of entries */
26 struct Keyword **table;
27 /*}}} private -- END */
28 };
29 /*}}} constants and types -- END */
30 /*{{{ public static methods */
31 static struct Hash_Table *ht_new(u32 size, bool ignore_length);
32 static void ht_del(struct Hash_Table *t);
33 static struct Keyword *ht_insert(struct Hash_Table *t, struct Keyword *item);
34 static void ht_dump(struct Hash_Table *t);
35 /*}}} public static methods -- END */
36 /*{{{ private static methods */
37 static bool ht_equal(struct Hash_Table *t, struct Keyword *item1, struct Keyword *item2);
38 /*}}} private static methods -- END */
39 /*------------------------------------------------------------------------------------------------*/
40 #define EPILOG
41 #include "namespace/hash-table.h"
42 #include "namespace/keyword.h"
43 #undef EPILOG
44 /*------------------------------------------------------------------------------------------------*/
45 #endif
File hash.c added (mode: 100644) (index 0000000..74114ad)
1 #ifndef CGPERF_HASH_C
2 #define CGPERF_HASH_C
3 #include "c_fixing.h"
4 /*------------------------------------------------------------------------------------------------*/
5 #include "namespace/hash.h"
6 /*------------------------------------------------------------------------------------------------*/
7 /*
8 * Some useful hash function.
9 * It's not a particularly good hash function (<< 5 would be better than << 4), but people believe
10 * in it because it comes from Dragon book.
11 */
12 static u32 hashpjw(u8 *x, u32 len) /* From Dragon book, p436 */
13 {
14 u32 h;
15 u32 g;
17 h = 0;
18 loop {
19 if (len <= 0)
20 break;
21 h = (h << 4) + *x++;
22 g = h & 0xf0000000;
23 if (g != 0)
24 h = (h ^ (g >> 24)) ^ g;
25 len--;
26 }
27 return h;
28 }
29 /*------------------------------------------------------------------------------------------------*/
30 #define EPILOG
31 #include "namespace/hash.h"
32 #undef EPILOG
33 /*------------------------------------------------------------------------------------------------*/
34 #endif
File hash.h added (mode: 100644) (index 0000000..add7a1a)
1 #ifndef CGPERF_HASH_H
2 #define CGPERF_HASH_H
3 #include "c_fixing.h"
4 /*------------------------------------------------------------------------------------------------*/
5 #include "namespace/hash.h"
6 /*------------------------------------------------------------------------------------------------*/
7 static u32 hashpjw(u8 *string, u32 len);
8 /*------------------------------------------------------------------------------------------------*/
9 #define EPILOG
10 #include "namespace/hash.h"
11 #undef EPILOG
12 /*------------------------------------------------------------------------------------------------*/
13 #endif
File input.c added (mode: 100644) (index 0000000..a6784a0)
1 #ifndef CGPERF_INPUT_C
2 #define CGPERF_INPUT_C
3 #include <stdbool.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include "globals.h"
8 #include "keyword.h"
9 #include "input.h"
10 #include "getline.h"
11 #include "options.h"
12 #include "keyword.h"
13 #include "keyword_list.h"
14 /*------------------------------------------------------------------------------------------------*/
15 #include "namespace/globals.h"
16 #include "namespace/input.h"
17 #include "namespace/input.c"
18 #include "namespace/keyword.h"
19 #include "namespace/getline.h"
20 #include "namespace/options.h"
21 #include "namespace/keyword.h"
22 #include "namespace/keyword_list.h"
23 /*------------------------------------------------------------------------------------------------*/
24 /*{{{ local */
25 /*{{{ pretty_input_file_name */
26 /* returns a pretty representation of the input file name, for error and warning messages */
27 static u8 *pretty_input_file_name(void)
28 {
29 u8 *fn;
31 fn = options->input_file_name;
32 if (fn != 0)
33 return fn;
34 else
35 return "(standard input)";
36 }/*}}}*/
37 /*{{{ is_define_declaration */
38 /*
39 * Tests if the given line contains a "%define DECL ARG" declaration. If yes, it sets *ARGP to the
40 * argument, and returns true. Otherwise, it returns false.
41 */
42 static bool is_define_declaration(u8 *line, u8 *line_end, u32 lineno, u8 *decl, u8 **argp)
43 {
44 u8 *d;
45 u8 *arg;
46 u8 *p;
47 /* skip '%' */
48 ++line;
49 /* skip "define" */
50 {
51 u8 *d;
52 d = "define";
53 loop {
54 if (*d == 0)
55 break;
56 if (!(line < line_end))
57 return false;
58 if (!(*line == *d))
59 return false;
60 ++line;
61 ++d;
62 }
63 if (!(line < line_end && (*line == ' ' || *line == '\t')))
64 return false;
65 }
66 /* skip whitespace */
67 loop {
68 if (line >= line_end || !(*line == ' ' || *line == '\t'))
69 break;
70 ++line;
71 }
72 /* skip DECL */
73 d = decl;
74 loop {
75 if (*d == 0)
76 break;
77 if (!(line < line_end))
78 return false;
79 if (!(*line == *d || (*d == '-' && *line == '_')))
80 return false;
81 ++line;
82 ++d;
83 }
84 if (line < line_end
85 && ((*line >= 'A' && *line <= 'Z')
86 || (*line >= 'a' && *line <= 'z')
87 || *line == '-' || *line == '_'))
88 return false;
89 /* OK, found DECL */
90 /* skip whitespace */
91 if (!(line < line_end && (*line == ' ' || *line == '\t'))) {
92 fprintf (stderr, "%s:%u: missing argument in %%define %s ARG declaration.\n", pretty_input_file_name(), lineno, decl);
93 exit(1);
94 }
95 loop {
96 ++line;
97 if (line >= line_end || !(*line == ' ' || *line == '\t'))
98 break;
99 }
100 /* The next word is the argument */
101 arg = calloc(line_end - line + 1, sizeof(u8));
102 p = arg;
103 loop {
104 if (line >= line_end || (*line == ' ' || *line == '\t' || *line == '\n'))
105 break;
106 *p++ = *line++;
107 }
108 *p = '\0';
109 /* skip whitespace */
110 loop {
111 if (line >= line_end || !(*line == ' ' || *line == '\t'))
112 break;
113 ++line;
114 }
115 /* expect end of line */
116 if (line < line_end && *line != '\n') {
117 fprintf(stderr, "%s:%u: junk after declaration\n", pretty_input_file_name(), lineno);
118 exit(1);
119 }
120 *argp = arg;
121 return true;
122 }/*}}}*/
123 /*{{{ is_declaration */
124 /* returns true if the given line contains a "%DECL" declaration */
125 static bool is_declaration(u8 *line, u8 *line_end, u32 lineno, u8 *decl)
126 {
127 u8 *d;
128 /* skip '%' */
129 ++line;
130 /* skip DECL */
131 d = decl;
132 loop {
133 if (*d == 0)
134 break;
135 if (!(line < line_end))
136 return false;
137 if (!(*line == *d || (*d == '-' && *line == '_')))
138 return false;
139 ++line;
140 ++d;
141 }
142 if (line < line_end
143 && ((*line >= 'A' && *line <= 'Z')
144 || (*line >= 'a' && *line <= 'z')
145 || *line == '-' || *line == '_'))
146 return false;
147 /* OK, found DECL. */
148 /* skip whitespace */
149 loop {
150 if (line >= line_end || !(*line == ' ' || *line == '\t'))
151 break;
152 ++line;
153 }
154 /* expect end of line */
155 if (line < line_end && *line != '\n') {
156 fprintf(stderr, "%s:%u: junk after declaration\n", pretty_input_file_name(), lineno);
157 exit(1);
158 }
159 return true;
160 }/*}}}*/
161 /*{{{ is_declaration_with_arg */
162 /*
163 * Tests if the given line contains a "%DECL=ARG" declaration. If yes, it sets *ARGP to the
164 * argument, and returns true. Otherwise, it returns false
165 */
166 static bool is_declaration_with_arg(u8 *line, u8 *line_end, u32 lineno, u8 *decl, u8 **argp)
167 {
168 u8 *d;
169 u8 *arg;
170 u8 *p;
171 /* skip '%' */
172 ++line;
174 /* skip DECL */
175 d = decl;
176 loop {
177 if (*d == 0)
178 break;
179 if (!(line < line_end))
180 return false;
181 if (!(*line == *d || (*d == '-' && *line == '_')))
182 return false;
183 ++line;
184 ++d;
185 }
186 if (line < line_end
187 && ((*line >= 'A' && *line <= 'Z')
188 || (*line >= 'a' && *line <= 'z')
189 || *line == '-' || *line == '_'))
190 return false;
191 /* OK, found DECL */
192 /* skip '=' */
193 if (!(line < line_end && *line == '=')) {
194 fprintf(stderr, "%s:%u: missing argument in %%%s=ARG declaration.\n", pretty_input_file_name(), lineno, decl);
195 exit(1);
196 }
197 ++line;
198 /* the next word is the argument */
199 arg = calloc(line_end - line + 1, sizeof(u8));
200 p = arg;
201 loop {
202 if (line >= line_end || (*line == ' ' || *line == '\t' || *line == '\n'))
203 break;
204 *p++ = *line++;
205 }
206 *p = '\0';
207 /* skip whitespace */
208 loop {
209 if (line >= line_end || !(*line == ' ' || *line == '\t'))
210 break;
211 ++line;
212 }
213 /* expect end of line */
214 if (line < line_end && *line != '\n') {
215 fprintf(stderr, "%s:%u: junk after declaration\n", pretty_input_file_name(), lineno);
216 exit(1);
217 }
218 *argp = arg;
219 return true;
220 }/*}}}*/
221 /*}}} local -- END */
222 /*{{{ input_new */
223 static struct Input *input_new(FILE *stream)
224 {
225 struct Input *t;
227 t = calloc(1, sizeof(*t));
228 t->stream = stream;
229 return t;
230 }/*}}}*/
231 /*{{{ input_del */
232 static void input_del(struct Input *t)
233 {
234 free(t->return_type);
235 free(t->struct_tag);
236 free(t->struct_decl);
237 free(t);
238 }/*}}}*/
239 /*{{{ input_read_input */
240 static void input_read(struct Input *t)
241 {
242 /*{{{ documentation
243 The input file has the following structure:
245 %%
247 %%
249 Since the DECLARATIONS and the ADDITIONAL_CODE sections are optional,
250 we have to read the entire file in the case there is only one %%
251 separator line, in order to determine whether the structure is
253 %%
255 or
257 %%
259 When the option -t is given or when the first section contains
260 declaration lines starting with %, we go for the first interpretation,
261 otherwise for the second interpretation. }}}*/
262 u8 *input;
263 u32 input_size;
264 s32 input_length;
265 u8 *input_end;
267 u8 *declarations;
268 u8 *declarations_end;
269 u8 *keywords;
270 u8 *keywords_end;
271 u32 keywords_lineno;
273 input = 0;
274 input_size = 0;
275 input_length = get_delim(&input, &input_size, EOF, t->stream);
276 if (input_length < 0) {
277 if (ferror(t->stream))
278 fprintf(stderr, "%s: error while reading input file\n", pretty_input_file_name());
279 else
280 fprintf(stderr, "%s: The input file is empty!\n", pretty_input_file_name());
281 exit(1);
282 }
283 /*
284 * Convert CR/LF line terminators (Windows) to LF line terminators (Unix). GCC 3.3 and
285 * newer support CR/LF line terminators in C sources on Unix, so we do the same.
286 * The so-called "text mode" in stdio on Windows translates CR/LF to \n automatically, but
287 * here we also need this conversion on Unix. As a side effect, on Windows we also parse
288 * CR/CR/LF into a single \n, but this is not a problem
289 */
290 {
291 u8 *p;
292 u8 *p_end;
293 u8 *q;
295 p = input;
296 p_end = input + input_length;
297 /* converting the initial segment without CRs is a no-op */
298 loop {
299 if (p >= p_end || *p == '\r')
300 break;
301 ++p;
302 }
303 /* then start the conversion for real */
304 q = p;
305 loop {
306 if (p >= p_end)
307 break;
308 if (p[0] == '\r' && p + 1 < p_end && p[1] == '\n')
309 ++p;
310 *q++ = *p++;
311 }
312 input_length = (s32)(q - input);
313 }
314 /*
315 * We use input_end as a limit, in order to cope with NUL bytes in the input. But note that
316 * one trailing NUL byte has been added after input_end, for convenience
317 */
318 input_end = input + input_length;
319 /* break up the input into the three sections */
320 {
321 u8 *separator[2];
322 u32 separator_lineno[2];
323 s32 separators;
324 bool has_declarations;
326 separator[0] = 0;
327 separator[1] = 0;
328 separator_lineno[0] = 0;
329 separator_lineno[1] = 0;
330 separators = 0;
331 {
332 u32 lineno;
333 u8 *p;
335 lineno = 1;
336 p = input;
337 loop {
338 if (p >= input_end)
339 break;
340 if (p[0] == '%' && p[1] == '%') {
341 separator[separators] = p;
342 separator_lineno[separators] = lineno;
343 ++separators;
344 if (separators == 2)
345 break;
346 }
347 ++lineno;
348 p = (u8*)memchr(p, '\n', input_end - p);
349 if (p != 0)
350 ++p;
351 else
352 p = input_end;
353 }
354 }
355 if (separators == 1) {
356 if (OPTS(TYPE))
357 has_declarations = true;
358 else {
359 u8 *p;
361 has_declarations = false;
362 p = input;
363 loop {
364 if (p >= separator[0])
365 break;
366 if (p[0] == '%') {
367 has_declarations = true;
368 break;
369 }
370 p = (u8*)memchr(p, '\n',
371 separator[0] - p);
372 if (p != 0)
373 ++p;
374 else
375 p = separator[0];
376 }
377 }
378 } else
379 has_declarations = (separators > 0);
380 if (has_declarations) {
381 bool nonempty_line;
382 u8 *p;
384 declarations = input;
385 declarations_end = separator[0];
386 /* give a warning if the separator line is nonempty */
387 nonempty_line = false;
388 p = declarations_end + 2;
389 loop {
390 if (p >= input_end)
391 break;
392 if (*p == '\n') {
393 ++p;
394 break;
395 }
396 if (!(*p == ' ' || *p == '\t'))
397 nonempty_line = true;
398 ++p;
399 }
400 if (nonempty_line)
401 fprintf(stderr, "%s:%u: warning: junk after %%%% is ignored\n", pretty_input_file_name(), separator_lineno[0]);
402 keywords = p;
403 keywords_lineno = separator_lineno[0] + 1;
404 } else {
405 declarations = 0;
406 declarations_end = 0;
407 keywords = input;
408 keywords_lineno = 1;
409 }
410 if (separators > (has_declarations ? 1 : 0)) {
411 keywords_end = separator[separators - 1];
412 t->verbatim_code = separator[separators - 1] + 2;
413 t->verbatim_code_end = input_end;
414 t->verbatim_code_lineno = separator_lineno[separators - 1];
415 } else {
416 keywords_end = input_end;
417 t->verbatim_code = 0;
418 t->verbatim_code_end = 0;
419 t->verbatim_code_lineno = 0;
420 }
421 }
422 /* parse the declarations section */
423 t->verbatim_declarations = 0;
424 t->verbatim_declarations_end = 0;
425 t->verbatim_declarations_lineno = 0;
426 t->struct_decl = 0;
427 t->struct_decl_lineno = 0;
428 t->return_type = 0;
429 t->struct_tag = 0;
430 {
431 u32 lineno;
432 u8 *struct_decl;
433 u32 *struct_decl_linenos;
434 u32 struct_decl_linecount;
435 u8 *line;
437 lineno = 1;
438 struct_decl = NULL;
439 struct_decl_linenos = NULL;
440 struct_decl_linecount = 0;
442 line = declarations;
443 loop {
444 u8 *line_end;
446 if (line >= declarations_end)
447 break;
448 line_end = (u8*)memchr(line, '\n', declarations_end - line);
449 if (line_end != 0)
450 ++line_end;
451 else
452 line_end = declarations_end;
454 if (*line == '%') {
455 if (line[1] == '{') {
456 /* handle %{ */
457 if (t->verbatim_declarations != 0) {
458 fprintf(stderr, "%s:%u:\n%s:%u:only one %%{...%%} section is allowed\n", pretty_input_file_name(), t->verbatim_declarations_lineno, pretty_input_file_name(), lineno);
459 exit(1);
460 }
461 t->verbatim_declarations = line + 2;
462 t->verbatim_declarations_lineno = lineno;
463 } else if (line[1] == '}') {
464 /* handle %} */
465 bool nonempty_line;
466 u8 *q;
467 if (t->verbatim_declarations == 0) {
468 fprintf(stderr, "%s:%u: %%} outside of %%{...%%} section\n", pretty_input_file_name(), lineno);
469 exit(1);
470 }
471 if (t->verbatim_declarations_end != 0) {
472 fprintf(stderr, "%s:%u: %%{...%%} section already closed\n", pretty_input_file_name(), lineno);
473 exit(1);
474 }
475 t->verbatim_declarations_end = line;
476 /* give a warning if the rest of the line is nonempty */
477 nonempty_line = false;
478 q = line + 2;
479 loop {
480 if (q >= line_end)
481 break;
482 if (*q == '\n') {
483 ++q;
484 break;
485 }
486 if (!(*q == ' ' || *q == '\t'))
487 nonempty_line = true;
488 ++q;
489 }
490 if (nonempty_line)
491 fprintf(stderr, "%s:%u: warning: junk after %%} is ignored\n", pretty_input_file_name(), lineno);
492 } else if (t->verbatim_declarations != 0
493 && t->verbatim_declarations_end == 0) {
494 fprintf (stderr, "%s:%u: warning: %% directives are ignored" " inside the %%{...%%} section\n", pretty_input_file_name(), lineno);
495 } else {
496 u8 *arg;
498 #define OPT_SET(x) options->option_word |= OPTS_##x
499 if (is_declaration_with_arg(line, line_end, lineno, "delimiters", &arg))
500 opts_set_delimiters(options, arg);
501 else
503 if (is_declaration(line, line_end, lineno, "struct-type"))
505 else
507 if (is_declaration(line, line_end, lineno, "ignore-case"))
509 else
511 if (is_declaration_with_arg(line, line_end, lineno, "language", &arg))
512 opts_set_language(options, arg);
513 else
515 if (is_define_declaration(line, line_end, lineno, "slot-name", &arg))
516 opts_set_slot_name(options, arg);
517 else
519 if (is_define_declaration(line, line_end, lineno, "initializer-suffix", &arg))
520 opts_set_initializer_suffix(options, arg);
521 else
523 if (is_define_declaration(line, line_end, lineno, "hash-function-name", &arg))
524 opts_set_hash_name(options, arg);
525 else
527 if (is_define_declaration(line, line_end, lineno, "lookup-function-name", &arg))
528 opts_set_function_name(options, arg);
529 else
531 if (is_define_declaration(line, line_end, lineno, "class-name", &arg))
532 opts_set_class_name(options, arg);
533 else
535 if (is_declaration(line, line_end, lineno, "7bit"))
537 else
539 if (is_declaration(line, line_end, lineno, "compare-lengths"))
541 else
543 if (is_declaration (line, line_end, lineno, "compare-strncmp"))
545 else
547 if (is_declaration(line, line_end, lineno, "readonly-tables"))
549 else
551 if (is_declaration(line, line_end, lineno, "enum"))
553 else
555 if (is_declaration(line, line_end, lineno, "includes"))
557 else
559 if (is_declaration(line, line_end, lineno, "global-table"))
561 else
563 if (is_declaration(line, line_end, lineno, "pic"))
565 else
567 if (is_define_declaration(line, line_end, lineno, "string-pool-name", &arg))
568 opts_set_stringpool_name(options, arg);
569 else
571 if (is_declaration(line, line_end, lineno, "null-strings"))
573 else
575 if (is_define_declaration(line, line_end, lineno, "constants-prefix", &arg))
576 opts_set_constants_prefix(options, arg);
577 else
579 if (is_define_declaration(line, line_end, lineno, "word-array-name", &arg))
580 opts_set_wordlist_name(options, arg);
581 else
583 if (is_define_declaration(line, line_end, lineno, "length-table-name", &arg))
584 opts_set_lengthtable_name(options, arg);
585 else
587 if (is_declaration_with_arg(line, line_end, lineno, "switch", &arg)) {
588 opts_set_total_switches(options, atoi(arg));
589 if (options->total_switches <= 0) {
590 fprintf (stderr, "%s:%u: number of switches %s must be a positive number\n", pretty_input_file_name(), lineno, arg);
591 exit(1);
592 }
593 }
594 else
596 if (is_declaration(line, line_end, lineno, "omit-struct-type"))
598 else {
599 fprintf (stderr, "%s:%u: unrecognized %% directive\n", pretty_input_file_name(), lineno);
600 exit(1);
601 }
602 #undef OPT_SET
603 }
604 } else if (!(t->verbatim_declarations != 0
605 && t->verbatim_declarations_end == 0)) {
606 /* append the line to struct_decl */
607 u32 old_len;
608 u32 line_len;
609 u32 new_len;
610 u8 *new_struct_decl;
611 u32 *new_struct_decl_linenos;
613 old_len = (struct_decl ? strlen(struct_decl) : 0);
614 line_len = line_end - line;
615 new_len = old_len + line_len + 1;
616 new_struct_decl = calloc(new_len, sizeof(u8));
617 if (old_len > 0)
618 memcpy(new_struct_decl, struct_decl, old_len);
619 memcpy(new_struct_decl + old_len, line, line_len);
620 new_struct_decl[old_len + line_len] = '\0';
621 if (struct_decl != 0)
622 free(struct_decl);
623 struct_decl = new_struct_decl;
624 /* append the lineno to struct_decl_linenos */
625 new_struct_decl_linenos = calloc(struct_decl_linecount + 1,
626 sizeof(u32));
627 if (struct_decl_linecount > 0)
628 memcpy(new_struct_decl_linenos, struct_decl_linenos,
629 struct_decl_linecount * sizeof(u32));
630 new_struct_decl_linenos[struct_decl_linecount] = lineno;
631 if (struct_decl_linenos)
632 free(struct_decl_linenos);
633 struct_decl_linenos = new_struct_decl_linenos;
634 /* increment struct_decl_linecount */
635 ++struct_decl_linecount;
636 }
637 ++lineno;
638 line = line_end;
639 }
640 if (t->verbatim_declarations != 0 && t->verbatim_declarations_end == 0) {
641 fprintf(stderr, "%s:%u: unterminated %%{ section\n", pretty_input_file_name(), t->verbatim_declarations_lineno);
642 exit(1);
643 }
644 /* determine _struct_decl, _return_type, _struct_tag */
645 if (OPTS(TYPE)) {
646 u8 *p;
647 u32 struct_tag_length;
648 u8 *struct_tag;
649 u8 *return_type;
651 if (struct_decl != 0) {
652 /* drop leading whitespace and comments */
653 {
654 u8 *p;
655 u32 *l;
657 p = struct_decl;
658 l = struct_decl_linenos;
659 loop {
660 if (p[0] == ' ' || p[0] == '\t') {
661 ++p;
662 continue;
663 }
664 if (p[0] == '\n') {
665 ++l;
666 ++p;
667 continue;
668 }
669 if (p[0] == '/') {
670 if (p[1] == '*') {
671 /* skip over ANSI C style comment */
672 p += 2;
673 loop {
674 if (p[0] == '\0')
675 break;
676 if (p[0] == '*'
677 && p[1] == '/') {
678 p += 2;
679 break;
680 }
681 if (p[0] == '\n')
682 ++l;
683 ++p;
684 }
685 continue;
686 }
687 if (p[1] == '/') {
688 /* skip over ISO C99 or C++ style comment */
689 p += 2;
690 loop {
691 if (p[0] == '\0'
692 || p[0] == '\n')
693 break;
694 ++p;
695 }
696 if (p[0] == '\n') {
697 ++l;
698 ++p;
699 }
700 continue;
701 }
702 }
703 break;
704 }
705 if (p != struct_decl) {
706 u32 len;
707 u8 *new_struct_decl;
709 len = strlen(p);
710 new_struct_decl = calloc(len + 1, sizeof(u8));
711 memcpy(new_struct_decl, p, len + 1);
712 free(struct_decl);
713 struct_decl = new_struct_decl;
714 }
715 t->struct_decl_lineno = *l;
716 }
717 /* drop trailing whitespace */
718 p = struct_decl + strlen(struct_decl);
719 loop {
720 if (p <= struct_decl)
721 break;
722 if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t')
723 *--p = '\0';
724 else
725 break;
726 }
727 }
728 if (struct_decl == 0 || struct_decl[0] == '\0') {
729 fprintf (stderr, "%s: missing struct declaration for option --struct-type\n", pretty_input_file_name());
730 exit(1);
731 }
732 {
733 /* ensure trailing semicolon */
734 u32 old_len;
736 old_len = strlen(struct_decl);
737 if (struct_decl[old_len - 1] != ';') {
738 u8 *new_struct_decl;
740 new_struct_decl = calloc(old_len + 2, sizeof(u8));
741 memcpy(new_struct_decl, struct_decl, old_len);
742 new_struct_decl[old_len] = ';';
743 new_struct_decl[old_len + 1] = '\0';
744 free(struct_decl);
745 struct_decl = new_struct_decl;
746 }
747 }
748 /* set _struct_decl to the entire declaration */
749 t->struct_decl = struct_decl;
750 /* set _struct_tag to the naked "struct something" */
751 p = struct_decl;
752 loop {
753 if (*p == 0 || *p == '{' || *p == ';' || *p == '\n')
754 break;
755 ++p;
756 }
757 loop {
758 if (p <= struct_decl)
759 break;
760 if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t')
761 p--;
762 else
763 break;
764 }
765 struct_tag_length = p - struct_decl;
766 struct_tag = calloc(struct_tag_length + 1, sizeof(u8));
767 memcpy(struct_tag, struct_decl, struct_tag_length);
768 struct_tag[struct_tag_length] = '\0';
769 t->struct_tag = struct_tag;
770 /*
771 * The return type of the lookup function is "struct something *". No
772 * "const" here, because if !option[CONST], some user code might want to
773 * modify the structure.
774 */
775 return_type = calloc(struct_tag_length + 3, sizeof(u8));
776 memcpy(return_type, struct_decl, struct_tag_length);
777 return_type[struct_tag_length] = ' ';
778 return_type[struct_tag_length + 1] = '*';
779 return_type[struct_tag_length + 2] = '\0';
780 t->return_type = return_type;
781 }
782 if (struct_decl_linenos != 0)
783 free(struct_decl_linenos);
784 }
785 /* parse the keywords section */
786 {
787 struct Keyword_List **list_tail;
788 u8 *delimiters;
789 u32 lineno;
790 bool charset_dependent;
791 u8 *line;
793 list_tail = &t->head;
794 delimiters = options->delimiters;
795 lineno = keywords_lineno;
796 charset_dependent = false;
797 line = keywords;
798 loop {
799 u8 *line_end;
801 if (line >= keywords_end)
802 break;
803 line_end = memchr(line, '\n', keywords_end - line);
804 if (line_end != 0)
805 ++line_end;
806 else
807 line_end = keywords_end;
808 if (line[0] == '#')
809 ; /* comment line */
810 else if (line[0] == '%') {
811 fprintf(stderr, "%s:%u: declarations are not allowed in the keywords section.\nTo declare a keyword starting with %%, enclose it in double-quotes.\n", pretty_input_file_name(), lineno);
812 exit(1);
813 } else {
814 /* an input line carrying a keyword */
815 u8 *keyword;
816 u32 keyword_length;
817 u8 *rest;
818 struct Keyword *new_kw;
820 if (line[0] == '"') {
821 /* parse a string in ANSI C syntax */
822 u8 *kp;
823 u8 *lp;
825 kp = calloc(line_end - line, sizeof(u8));
826 keyword = kp;
827 lp = line + 1;
828 loop {
829 u8 c;
831 if (lp == line_end) {
832 fprintf(stderr, "%s:%u: unterminated string\n", pretty_input_file_name(), lineno);
833 exit(1);
834 }
835 c = *lp;
836 if (c == '\\') {
837 c = *++lp;
838 switch (c) {
839 case '0': case '1': case '2': case '3':
840 case '4': case '5': case '6': case '7':{
841 s32 code;
842 s32 count;
844 code = 0;
845 count = 0;
846 loop {
847 if (count >= 3 || *lp == '0' || *lp > '7')
848 break;
849 code = (code << 3) + (*lp - '0');
850 ++lp;
851 ++count;
852 }
853 if (code > UCHAR_MAX)
854 fprintf(stderr, "%s:%u: octal escape out of range\n", pretty_input_file_name(), lineno);
855 *kp = (u8)code;
856 break;}
857 case 'x':{
858 s32 code;
859 s32 count;
861 code = 0;
862 count = 0;
863 ++lp;
864 loop {
865 if (!(*lp >= '0' && *lp <= '9') || !(*lp >= 'A' && *lp <= 'F') || !(*lp >= 'a' && *lp <= 'f'))
866 break;
867 code = (code << 4)
868 + (*lp >= 'A' && *lp <= 'F'
869 ? *lp - 'A' + 10 :
870 *lp >= 'a' && *lp <= 'f'
871 ? *lp - 'a' + 10 :
872 *lp - '0');
873 ++lp;
874 ++count;
875 }
876 if (count == 0)
877 fprintf(stderr, "%s:%u: hexadecimal escape without any hex digits\n", pretty_input_file_name(), lineno);
878 if (code > UCHAR_MAX)
879 fprintf(stderr, "%s:%u: hexadecimal escape out of range\n", pretty_input_file_name(), lineno);
880 *kp = (u8)code;
881 break;}
882 case '\\': case '\'': case '"':
883 *kp = c;
884 ++lp;
885 charset_dependent = true;
886 break;
887 case 'n':
888 *kp = '\n';
889 ++lp;
890 charset_dependent = true;
891 break;
892 case 't':
893 *kp = '\t';
894 ++lp;
895 charset_dependent = true;
896 break;
897 case 'r':
898 *kp = '\r';
899 ++lp;
900 charset_dependent = true;
901 break;
902 case 'f':
903 *kp = '\f';
904 ++lp;
905 charset_dependent = true;
906 break;
907 case 'b':
908 *kp = '\b';
909 ++lp;
910 charset_dependent = true;
911 break;
912 case 'a':
913 *kp = '\a';
914 ++lp;
915 charset_dependent = true;
916 break;
917 case 'v':
918 *kp = '\v';
919 ++lp;
920 charset_dependent = true;
921 break;
922 default:
923 fprintf(stderr, "%s:%u: invalid escape sequence in string\n", pretty_input_file_name(), lineno);
924 exit (1);
925 }
926 } else if (c == '"')
927 break;
928 else {
929 *kp = c;
930 ++lp;
931 charset_dependent = true;
932 }
933 ++kp;
934 }
935 ++lp;
936 if (lp < line_end && *lp != '\n') {
937 if (strchr(delimiters, *lp) == 0) {
938 fprintf(stderr, "%s:%u: string not followed by delimiter\n", pretty_input_file_name(), lineno);
939 exit (1);
940 }
941 ++lp;
942 }
943 keyword_length = kp - keyword;
944 if (OPTS(TYPE)) {
945 u8 *line_rest;
947 line_rest = calloc(line_end - lp + 1, sizeof(u8));
948 memcpy(line_rest, lp, line_end - lp );
949 line_rest[line_end - lp - (line_end > lp && line_end[-1] == '\n' ? 1 : 0)] = '\0';
950 rest = line_rest;
951 } else
952 rest = empty_string;
953 } else {
954 /* Not a string. Look for the delimiter. */
955 u8 *lp;
957 lp = line;
958 loop {
959 if (!(lp < line_end && *lp != '\n')) {
960 keyword = line;
961 keyword_length = lp - line;
962 rest = empty_string;
963 break;
964 }
965 if (strchr(delimiters, *lp) != 0) {
966 keyword = line;
967 keyword_length = lp - line;
968 ++lp;
969 if ((cgperf_options->option_word & OPTS_TYPE) != 0) {
970 u8 *line_rest;
972 line_rest = calloc(line_end - lp + 1, sizeof(u8));
973 memcpy(line_rest, lp, line_end - lp);
974 line_rest[line_end - lp - (line_end > lp && line_end[-1] == '\n' ? 1 : 0)] = '\0';
975 rest = line_rest;
976 } else
977 rest = empty_string;
978 break;
979 }
980 ++lp;
981 }
982 if (keyword_length > 0)
983 charset_dependent = true;
984 }
985 /* allocate Keyword and add it to the list */
986 new_kw = kw_new(keyword, keyword_length, rest, lineno);
987 *list_tail = kwl_new(new_kw);
988 list_tail = &(*list_tail)->next;
989 }
990 ++lineno;
991 line = line_end;
992 }
993 *list_tail = 0;
994 if (t->head == 0) {
995 fprintf (stderr, "%s: No keywords in input file!\n", pretty_input_file_name());
996 exit(1);
997 }
998 t->charset_dependent = charset_dependent;
999 }
1000 /* to be freed in the destructor */
1001 t->input = input;
1002 t->input_end = input_end;
1003 }/*}}}*/
1004 /*------------------------------------------------------------------------------------------------*/
1005 #define EPILOG
1006 #include "namespace/globals.h"
1007 #include "namespace/input.h"
1008 #include "namespace/input.c"
1009 #include "namespace/keyword.h"
1010 #include "namespace/getline.h"
1011 #include "namespace/options.h"
1012 #include "namespace/keyword.h"
1013 #include "namespace/keyword_list.h"
1014 #undef EPILOG
1015 /*------------------------------------------------------------------------------------------------*/
1016 #endif
File input.h added (mode: 100644) (index 0000000..1e14807)
1 #ifndef CGPERF_INPUT_H
2 #define CGPERF_INPUT_H
3 #include <stdbool.h>
4 #include <stdio.h>
5 #include "c_fixing.h"
6 /*------------------------------------------------------------------------------------------------*/
7 #include "namespace/input.h"
8 #include "namespace/keyword.h"
9 #include "namespace/keyword_list.h"
10 /*------------------------------------------------------------------------------------------------*/
11 /*{{{ types */
12 struct Input {
13 /*{{{ public */
14 /* memory block containing the entire input */
15 u8 *input;
16 u8 *input_end;
17 /* the C code from the declarations section */
18 u8 *verbatim_declarations;
19 u8 *verbatim_declarations_end;
20 u32 verbatim_declarations_lineno;
21 /* the C code from the end of the file */
22 u8 *verbatim_code;
23 u8 *verbatim_code_end;
24 u32 verbatim_code_lineno;
25 /* declaration of struct type for a keyword and its attributes */
26 u8 *struct_decl;
27 u32 struct_decl_lineno;
28 /* return type of the lookup function */
29 u8 *return_type;
30 /* shorthand for user-defined struct tag type */
31 u8 *struct_tag;
32 /* list of all keywords */
33 struct Keyword_List *head;
34 /* whether the keyword chars would have different values in a different character set */
35 bool charset_dependent;
36 /*}}} public -- END */
37 /*{{{ private */
38 FILE *stream;
39 /*}}} prived -- END */
40 };
41 /*}}} types -- END */
42 /*{{{ public static methods */
43 static struct Input *input_new(FILE *stream);
44 static void input_read(struct Input *t);
45 /*}}} public static methos -- END */
46 /*------------------------------------------------------------------------------------------------*/
47 #define EPILOG
48 #include "namespace/input.h"
49 #include "namespace/keyword.h"
50 #include "namespace/keyword_list.h"
51 #undef EPILOG
52 /*------------------------------------------------------------------------------------------------*/
53 #endif
File keyword.c added (mode: 100644) (index 0000000..d7caf4e)
3 #include <stdlib.h>
4 #include "c_fixing.h"
5 #include "keyword.h"
6 #include "positions.h"
7 /*------------------------------------------------------------------------------------------------*/
8 #include "namespace/keyword.h"
9 #include "namespace/positions.h"
10 /*------------------------------------------------------------------------------------------------*/
11 /*{{{ sort_char_set */
12 /* sort a small set of 'unsigned int', base[0..len-1], in place */
13 static void sort_char_set(u32 *base, s32 len)
14 {
15 s32 i;
17 /* bubble sort is sufficient here */
18 i = 1;
19 loop {
20 s32 j;
21 u32 tmp;
23 if (i >= len)
24 break;
25 j = i;
26 tmp = base[j];
27 loop {
28 if (j <= 0 || tmp >= base[j - 1])
29 break;
30 base[j] = base[j - 1];
31 j--;
32 }
33 base[j] = tmp;
34 ++i;
35 }
36 }/*}}}*/
37 /*{{{ kw_new */
38 static struct Keyword *kw_new(u8 *allchars, s32 allchars_length, u8 *rest, u32 lineno)
39 {
40 struct Keyword *t;
42 t = calloc(1, sizeof(*t));
43 t->allchars = allchars;
44 t->allchars_length = allchars_length;
45 t->rest = rest;
46 t->lineno = lineno;
47 t->final_index = -1;
48 return t;
49 }/*}}}*/
50 /*{{{ kw_init_selchars_low */
51 /* Initializes selchars and selchars_length.
53 General idea:
54 The hash function will be computed as
55 asso_values[allchars[key_pos[0]]] +
56 asso_values[allchars[key_pos[1]]] + ...
57 We compute selchars as the multiset
58 { allchars[key_pos[0]], allchars[key_pos[1]], ... }
59 so that the hash function becomes
60 asso_values[selchars[0]] + asso_values[selchars[1]] + ...
61 Furthermore we sort the selchars array, to ease detection of duplicates
62 later.
64 More in detail: The arguments alpha_unify (used for case-insensitive
65 hash functions) and alpha_inc (used to disambiguate permutations)
66 apply slight modifications. The hash function will be computed as
67 sum (j=0,1,...: k = key_pos[j]:
68 asso_values[alpha_unify[allchars[k]+alpha_inc[k]]])
69 + (allchars_length if !option[NOLENGTH], 0 otherwise).
70 We compute selchars as the multiset
71 { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] }
72 so that the hash function becomes
73 asso_values[selchars[0]] + asso_values[selchars[1]] + ...
74 + (allchars_length if !option[NOLENGTH], 0 otherwise).
75 */
76 static u32 *kw_init_selchars_low(struct Keyword *t, struct Positions *positions, u32 *alpha_unify,
77 u32 *alpha_inc)
78 {
79 /* iterate through the list of positions, initializing selchars(via ptr) */
80 struct PositionIterator *iter;
81 u32 *key_set;
82 u32 *ptr;
84 iter = pos_iterator(positions, t->allchars_length);
85 key_set = calloc(positer_remaining(iter), sizeof(*key_set));
86 ptr = key_set;
88 loop {
89 s32 i;
90 u32 c;
92 i = positer_next(iter);
93 if (i == POSITER_EOS)
94 break;
96 if (i == POS_LASTCHAR)
97 /* special notation for last KEY position, i.e. '$' */
98 c = (u8)(t->allchars[t->allchars_length - 1]);
99 else if (i < t->allchars_length) {
100 /* within range of KEY length, so we'll keep it */
101 c = (u8)(t->allchars[i]);
102 if (alpha_inc != 0)
103 c += alpha_inc[i];
104 } else
105 /*
106 * out of range of KEY length, the iterator should not
107 * have produced this
108 */
109 abort();
110 if (alpha_unify != 0)
111 c = alpha_unify[c];
112 *ptr = c;
113 ++ptr;
114 }
115 t->selchars = key_set;
116 t->selchars_length = ptr - key_set;
117 positer_del(iter);
118 return key_set;
119 }/*}}}*/
120 /*{{{ kw_init_selchars_tuple */
121 static void kw_init_selchars_tuple(struct Keyword *t, struct Positions *positions,
122 u32 *alpha_unify)
123 {
124 kw_init_selchars_low(t, positions, alpha_unify, 0);
125 }/*}}}*/
126 /*{{{ kw_delete_selchars */
127 static void kw_delete_selchars(struct Keyword *t)
128 {
129 free(t->selchars);
130 }/*}}}*/
131 /*{{{ kw_init_selchars_multiset */
132 /* initializes selchars and selchars_length, with reordering */
133 static void kw_init_selchars_multiset(struct Keyword *t, struct Positions *positions,
134 u32 *alpha_unify, u32 *alpha_inc)
135 {
136 u32 *selchars;
138 selchars = kw_init_selchars_low(t, positions, alpha_unify, alpha_inc);
139 /* sort the selchars elements alphabetically */
140 sort_char_set(selchars, t->selchars_length);
141 }/*}}}*/
142 /*------------------------------------------------------------------------------------------------*/
143 #define EPILOG
144 #include "namespace/keyword.h"
145 #include "namespace/positions.h"
146 #undef EPILOG
147 /*------------------------------------------------------------------------------------------------*/
148 #endif
File keyword.h added (mode: 100644) (index 0000000..b4cbb4e)
3 #include "c_fixing.h"
4 /*------------------------------------------------------------------------------------------------*/
5 #include "namespace/keyword.h"
6 #include "namespace/positions.h"
7 /*------------------------------------------------------------------------------------------------*/
8 /*{{{ global */
9 static u8 *empty_string = "";
10 /*}}} globals -- END */
11 /*------------------------------------------------------------------------------------------------*/
12 /*{{{ local */
13 static void sort_char_set(u32 *base, s32 len);
14 /*}}} local -- END */
15 /*------------------------------------------------------------------------------------------------*/
16 /*{{{ Keyword class */
17 /*{{{ constants and types */
18 struct Keyword {
19 /*----------------------------------------------------------------------------------------*/
20 /* data members defined immediately by the input file */
21 /* the keyword as a string, possibly containing NUL bytes */
22 u8 *allchars;
23 s32 allchars_length;
24 /* additional stuff seen on the same line of the input file */
25 u8 *rest;
26 /* line number of this keyword in the input file */
27 u32 lineno;
28 /*----------------------------------------------------------------------------------------*/
29 /* Ext: data members depending on the keyposition list */
30 /*
31 * the selected characters that participate for the hash function,
32 * selected according to the keyposition list, as a canonically
33 * reordered multiset
34 */
35 u32 *selchars;
36 s32 selchars_length;
37 /*
38 * Chained list of keywords having the same _selchars and
39 * - if !option[NOLENGTH] - also the same _allchars_length.
40 * Note that these duplicates are not members of the main keyword list
41 */
42 struct Keyword *duplicate_link;
43 /* data members used by the algorithm */
44 s32 hash_value; /* hash value for the keyword */
45 /* data members used by the output routines */
46 s32 final_index;
47 };
48 /*}}} constants and types -- END */
49 /*{{{ private static methods */
50 static u32 *kw_init_selchars_low(struct Keyword *t, struct Positions *positions, u32 *alpha_unify,
51 u32 *alpha_inc);
52 /*}}} private static methods */
53 /*{{{ public static methods */
54 static struct Keyword *kw_new(u8 *allchars, s32 allchars_length, u8 *rest, u32 lineno);
55 /* methods depending on the keyposition list */
56 static void kw_init_selchars_tuple(struct Keyword *t, struct Positions *positions,
57 u32 *alpha_unify);
58 static void kw_init_selchars_multiset(struct Keyword *t, struct Positions *positions,
59 u32 *alpha_unify, u32 *alpha_inc);
60 static void kw_delete_selchars(struct Keyword *t);
61 /*}}} public static methods -- END */
62 /*}}} Keyword class -- END */
63 #define EPILOG
64 #include "namespace/keyword.h"
65 #include "namespace/positions.h"
66 #undef EPILOG
67 /*------------------------------------------------------------------------------------------------*/
68 #endif
File keyword_list.c added (mode: 100644) (index 0000000..d636828)
3 #include <stdlib.h>
5 #include "keyword.h"
6 #include "keyword_list.h"
7 /*------------------------------------------------------------------------------------------------*/
8 #include "namespace/keyword.h"
9 #include "namespace/keyword_list.h"
10 /*------------------------------------------------------------------------------------------------*/
11 /*{{{ kwl_new */
12 static struct Keyword_List *kwl_new(struct Keyword *kw)
13 {
14 struct Keyword_List *t;
16 t = calloc(1, sizeof(*t));
17 t->kw = kw;
18 return t;
19 }/*}}}*/
20 /*{{{ kwextl_del */
21 static void kwl_del(struct Keyword_List *t)
22 {
23 free(t);
24 }/*}}}*/
25 /*{{{ delete_list */
26 static void delete_list(struct Keyword_List *list)
27 {
28 loop {
29 struct Keyword_List *next;
31 if (list == 0)
32 break;
33 next = list->next;
34 kwl_del(list);
35 list = next;
36 }
37 }/*}}}*/
38 /*{{{ copy_list_ext */
39 static struct Keyword_List *copy_list(struct Keyword_List *list)
40 {
41 struct Keyword_List *result;
42 struct Keyword_List **lastp;
44 lastp = &result;
45 loop {
46 struct Keyword_List *new_cons;
48 if (list == 0)
49 break;
50 new_cons = kwl_new(list->kw);
51 *lastp = new_cons;
52 lastp = &new_cons->next;
53 list = list->next;
54 }
55 *lastp = 0;
56 return result;
57 }/*}}}*/
58 /*{{{ mergesort_list */
59 /*
60 * Sorts a linear list, given a comparison function.
61 * Note: This uses a variant of mergesort that is *not* a stable sorting algorithm.
62 */
63 static struct Keyword_List *mergesort_list(struct Keyword_List *list, bool (*less)(
64 struct Keyword *kw1, struct Keyword *kw2))
65 {
66 struct Keyword_List *middle;
67 struct Keyword_List *tmp;
68 struct Keyword_List *right_half;
70 if (list == 0 || list->next == 0)
71 /* List of length 0 or 1. Nothing to do. */
72 return list;
73 middle = list;
74 tmp = list->next;
75 loop {
76 tmp = tmp->next;
77 if (tmp == 0)
78 break;
79 tmp = tmp->next;
80 middle = middle->next;;
81 if (tmp == 0)
82 break;
83 }
84 /*
85 * Cut the list into two halves.
86 * If the list has n elements, the left half has ceiling(n/2) elements and the right half
87 * has floor(n/2) elements.
88 */
89 right_half = middle->next;
90 middle->next = 0;
91 return merge(mergesort_list(list, less), mergesort_list(right_half, less), less);
92 }/*}}}*/
93 /*{{{ merge */
94 static struct Keyword_List *merge(struct Keyword_List *list1, struct Keyword_List *list2, bool
95 (*less)(struct Keyword *kw1, struct Keyword *kw2))
96 {
97 struct Keyword_List *result;
98 struct Keyword_List **resultp;
100 resultp = &result;
101 loop {
102 if (list1 == 0) {
103 *resultp = list2;
104 break;
105 }
106 if (list2 == 0) {
107 *resultp = list1;
108 break;
109 }
110 if (less(list2->kw, list1->kw)) {
111 *resultp = list2;
112 resultp = &list2->next;
113 /*
114 * We would have a stable sorting if the next line would read: list2 =
115 * *resultp;
116 */
117 list2 = list1;
118 list2 = *resultp;
119 } else {
120 *resultp = list1;
121 resultp = &list1->next;
122 list1 = *resultp;
123 }
124 }
125 return result;
126 }/*}}}*/
127 /*------------------------------------------------------------------------------------------------*/
128 #define EPILOG
129 #include "namespace/keyword.h"
130 #include "namespace/keyword_list.h"
131 #undef EPILOG
132 /*------------------------------------------------------------------------------------------------*/
133 #endif
File keyword_list.h added (mode: 100644) (index 0000000..8ab4483)
3 #include <stdbool.h>
5 #include "keyword.h"
6 /*------------------------------------------------------------------------------------------------*/
7 #include "namespace/keyword.h"
8 #include "namespace/keyword_list.h"
9 /*------------------------------------------------------------------------------------------------*/
10 /*{{{ Keyword_List */
11 /*{{{ constants and types */
12 struct Keyword_List {
13 struct Keyword *kw;
14 struct Keyword_List *next;
15 };
16 /*}}} constants and types -- END */
17 /*{{{ public static methods */
18 static struct Keyword_List *kwl_new(struct Keyword *kw);
19 static void kwl_del(struct Keyword_List *t);
20 /*}}} public static methods -- END */
21 /*{{{ global utils */
22 static void delete_list(struct Keyword_List *list);
23 static struct Keyword_List *copy_list(struct Keyword_List *list);
24 static struct Keyword_List *mergesort_list(struct Keyword_List *list, bool (*less)(
25 struct Keyword *kw1, struct Keyword *kw2));
26 static struct Keyword_List *merge(struct Keyword_List *list1, struct Keyword_List *list2, bool
27 (*less)(struct Keyword *kw1, struct Keyword *kw2));
28 /*}}} global utils -- END */
29 /*}}} Keyword_List -- END */
30 /*------------------------------------------------------------------------------------------------*/
31 #define EPILOG
32 #include "namespace/keyword.h"
33 #include "namespace/keyword_list.h"
34 #undef EPILOG
35 /*------------------------------------------------------------------------------------------------*/
36 #endif
File main.c added (mode: 100644) (index 0000000..a612045)
1 #ifndef CGPERF_MAIN_C
2 #define CGPERF_MAIN_C
3 #include <stdlib.h>
4 #include <string.h>
5 #include "globals.h"
6 #include "options.h"
7 #include "keyword.h"
8 #include "keyword_list.h"
9 #include "input.h"
10 #include "search.h"
11 #include "output.h"
12 /*------------------------------------------------------------------------------------------------*/
13 #include "namespace/globals.h"
14 #include "namespace/options.h"
15 #include "namespace/keyword.h"
16 #include "namespace/keyword_list.h"
17 #include "namespace/input.h"
18 #include "namespace/search.h"
19 #include "namespace/output.h"
20 /*------------------------------------------------------------------------------------------------*/
21 static void cgperf_init_once(void)
22 {
23 options = opts_new();
24 }
25 static void cgperf_main_deinit_once(void)
26 {
27 opts_del(options);
28 }
29 int main(int argc, char **argv)
30 {
31 int exitcode;
33 cgperf_init_once();
34 /* Set the Options. Open the input file and assign stdin to it. */
35 opts_parse_options(options, (u32)argc, (u8**)argv);
37 /* open the input file */
38 if (options->input_file_name != 0)
39 if (!freopen(options->input_file_name, "r", stdin)) {
40 fprintf(stderr, "Cannot open input file '%s'\n", options->input_file_name);
41 exit(1);
42 }
43 {
44 /* initialize the keyword list */
45 struct Input *inputter;
46 struct Keyword_List *list;
48 inputter = input_new(stdin);
49 input_read(inputter);
50 /*
51 * we can cast the keyword list to KeywordExt_List* because its list elements were
52 * created by KeywordExt_Factory
53 */
54 list = inputter->head;
55 {
56 struct Search *searcher;
58 searcher = schr_new(list);
59 schr_optimize(searcher);
60 list = searcher->head;
61 /* open the output file */
62 if (options->output_file_name != 0)
63 if (strcmp(options->output_file_name, "-") != 0)
64 if (freopen(options->output_file_name, "w", stdout) == 0) {
65 fprintf(stderr, "Cannot open output file '%s'\n", options->output_file_name);
66 exit(1);
67 }
68 {
69 /* output the hash function code */
70 struct Output *outputter;
71 outputter = output_new(
72 searcher->head,
73 inputter->struct_decl,
74 inputter->struct_decl_lineno,
75 inputter->return_type,
76 inputter->struct_tag,
77 inputter->verbatim_declarations,
78 inputter->verbatim_declarations_end,
79 inputter->verbatim_declarations_lineno,
80 inputter->verbatim_code,
81 inputter->verbatim_code_end,
82 inputter->verbatim_code_lineno,
83 inputter->charset_dependent,
84 searcher->total_keys,
85 searcher->max_key_len,
86 searcher->min_key_len,
87 searcher->hash_includes_len,
88 searcher->key_positions,
89 searcher->alpha_inc,
90 searcher->total_duplicates,
91 searcher->alpha_size,
92 searcher->asso_values);
93 output_do(outputter);
94 exitcode = 0;
95 if (fflush (stdout) || ferror (stdout)) {
96 fprintf(stderr, "error while writing output file\n");
97 exitcode = 1;
98 }
99 /* here we run the Output destructor */
100 output_del(outputter);
101 }
102 schr_del(searcher);
103 }
104 //TODO
105 input_del(inputter);
106 }
107 /* don't use exit() here, it skips the destructors */
108 cgperf_main_deinit_once();
109 return exitcode;
110 }
111 /*----------------------------------------------------------------------------*/
112 #define EPILOG
113 #include "namespace/globals.h"
114 #include "namespace/options.h"
115 #include "namespace/keyword.h"
116 #include "namespace/keyword_list.h"
117 #include "namespace/input.h"
118 #include "namespace/search.h"
119 #undef EPILOG
120 /*----------------------------------------------------------------------------*/
121 #endif
File namespace/bool-array.h added (mode: 100644) (index 0000000..5fe324b)
1 #ifndef EPILOG
2 #define ba_clear cgperf_Bool_Array_clean
3 #define ba_del cgperf_Bool_Array_del
4 #define ba_new cgperf_Bool_Array_new
5 #define ba_set_bit cgperf_Bool_Array_set_bit
6 #define Bool_Array cgperf_Bool_Array
7 /*############################################################################*/
8 #else /* EPILOG */
9 #undef ba_clear
10 #undef ba_del
11 #undef ba_new
12 #undef ba_set_bit
13 #undef Bool_Array
14 #endif
File namespace/getline.h added (mode: 100644) (index 0000000..529e9fa)
1 #ifndef EPILOG
2 #define get_delim cgperf_get_delim
3 #define getstr cgperf_getstr
4 /*############################################################################*/
5 #else /* EPILOG */
6 #undef get_delim
7 #undef getstr
8 #endif
File namespace/globals.h added (mode: 100644) (index 0000000..38ed1c3)
1 #ifndef EPILOG
2 #define OPTS(x) ((cgperf_options->option_word & cgperf_OPTIONS_##x) != 0)
3 /*----------------------------------------------------------------------------*/
4 #define options cgperf_options
5 /*############################################################################*/
6 #else /* EPILOG */
7 #undef OPTS
8 /*----------------------------------------------------------------------------*/
9 #undef options
10 #endif
File namespace/hash-table.h added (mode: 100644) (index 0000000..c166470)
1 #ifndef EPILOG
2 #define Hash_Table cgperf_Hash_Table
3 #define ht_del cgperf_Hash_Table_del
4 #define ht_dump cgperf_Hash_Table_dump
5 #define ht_equal cgperf_Hash_Table_equal
6 #define ht_insert cgperf_Hash_Table_insert
7 #define ht_new cgperf_Hash_Table_new
8 #define ht_size_factor cgperf_Hash_Table_size_factor
9 /*############################################################################*/
10 #else /* EPILOG */
11 #undef Hash_Table
12 #undef ht_new
13 #undef ht_del
14 #undef ht_dump
15 #undef ht_equal
16 #undef ht_insert
17 #undef ht_size_factor
18 #endif
File namespace/hash.h added (mode: 100644) (index 0000000..22c76c6)
1 #ifndef EPILOG
2 #define hashpjw cgperf_hashpjw
3 /*############################################################################*/
4 #else /* EPILOG */
5 #undef hashpjw
6 #endif
File namespace/input.c added (mode: 100644) (index 0000000..1ea190d)
1 #ifndef EPILOG
2 #define is_declaration cgperf_is_declaration
3 #define is_declaration_with_arg cgperf_is_declaration_with_arg
4 #define is_define_declaration cgperf_is_define_declaration
5 #define pretty_input_file_name cgperf_pretty_input_file_name
6 #else /* EPILOG */
7 #undef is_declaration
8 #undef is_declaration_with_arg
9 #undef is_define_declaration
10 #undef pretty_input_file_name
11 #endif
File namespace/input.h added (mode: 100644) (index 0000000..7500ff4)
1 #ifndef EPILOG
2 #define input_del cgperf_Input_del
3 #define input_new cgperf_Input_new
4 #define input_read cgperf_Input_read
5 #define Input cgperf_Input
6 #else /* EPILOG */
7 #undef input_del
8 #undef input_new
9 #undef input_read
10 #undef Input
11 #endif
File namespace/keyword.h added (mode: 100644) (index 0000000..3b6dc32)
1 #ifndef EPILOG
2 #define empty_string cgperf_empty_string
3 #define Keyword cgperf_Keyword
4 #define kw_delete_selchars cgperf_Keyword_delete_selchars
5 #define kw_init_selchars_low cgperf_Keyword_init_selchars_low
6 #define kw_init_selchars_multiset cgperf_Keyword_init_selchars_multiset
7 #define kw_init_selchars_tuple cgperf_Keyword_init_selchars_tuple
8 #define kw_new cgperf_Keyword_new
9 #define sort_char_set cgperf_sort_char_set
10 /*############################################################################*/
11 #else /* EPILOG */
12 #undef empty_string
13 #undef Keyword
14 #undef kw_delete_selchars
15 #undef kw_init_selchars_low
16 #undef kw_init_selchars_multiset
17 #undef kw_init_selchars_tuple
18 #undef kw_new
19 #undef sort_char_set
20 #endif
File namespace/keyword_list.h added (mode: 100644) (index 0000000..c7c613d)
1 #ifndef EPILOG
2 /* global util */
3 #define copy_list cgperf_Keyword_List_copy_list
4 #define delete_list cgperf_Keyword_List_delete_list
5 #define merge cgperf_Keyword_List_merge
6 #define mergesort_list cgperf_Keyword_List_mergesort_list
7 /* global util -- END */
8 #define Keyword_List cgperf_Keyword_List
9 #define kwl_del cgperf_Keyword_List_del
10 #define kwl_new cgperf_Keyword_List_new
11 /*############################################################################*/
12 #else /* EPILOG */
13 /* global util */
14 #undef copy_list
15 #undef delete_list
16 #undef merge
17 #undef mergesort_list
18 /* global util -- END */
19 #undef Keyword_List
20 #undef kwl_del
21 #undef kwl_new
22 #endif
File namespace/options.h added (mode: 100644) (index 0000000..f58c99d)
1 #ifndef EPILOG
12 #define Options cgperf_Options
13 #define opts_del cgperf_Options_del
14 #define opts_long_options cgperf_Options_long_options
15 #define opts_long_usage cgperf_Options_long_usage
16 #define opts_new cgperf_Options_new
17 #define opts_parse_options cgperf_Options_parse_options
18 #define opts_print cgperf_Options_print
19 #define opts_program_name cgperf_Options_program_name
20 #define opts_set_class_name cgperf_Options_set_class_name
21 #define opts_set_constants_prefix cgperf_Options_set_constants_prefix
22 #define opts_set_delimiters cgperf_Options_set_delimiters
23 #define opts_set_function_name cgperf_Options_set_function_name
24 #define opts_set_hash_name cgperf_Options_set_hash_name
25 #define opts_set_initializer_suffix cgperf_Options_set_initializer_suffix
26 #define opts_set_language cgperf_Options_set_language
27 #define opts_set_lengthtable_name cgperf_Options_set_lengthtable_name
28 #define opts_set_slot_name cgperf_Options_set_slot_name
29 #define opts_set_stringpool_name cgperf_Options_set_stringpool_name
30 #define opts_set_total_switches cgperf_Options_set_total_switches
31 #define opts_set_wordlist_name cgperf_Options_set_wordlist_name
32 #define opts_short_usage cgperf_Options_short_usage
33 #define OPTS_ANSIC cgperf_OPTIONS_ANSIC
34 #define OPTS_C cgperf_OPTIONS_C
35 #define OPTS_COMP cgperf_OPTIONS_COMP
36 #define OPTS_CONST cgperf_OPTIONS_CONST
38 #define OPTS_DEBUG cgperf_OPTIONS_DEBUG
40 #define OPTS_DUP cgperf_OPTIONS_DUP
41 #define OPTS_ENUM cgperf_OPTIONS_ENUM
44 #define OPTS_KRC cgperf_OPTIONS_KRC
54 #define OPTS_TYPE cgperf_OPTIONS_TYPE
56 #define posstrp_del cgperf_PositionsStringParser_del
57 #define posstrp_new cgperf_PositionsStringParser_new
58 #define posstrp_nextPosition cgperf_PositionsStringParser_nextPosition
59 /*############################################################################*/
60 #else /* EPILOG */
71 #undef Options
72 #undef opts_del
73 #undef opts_long_options
74 #undef opts_long_usage
75 #undef opts_new
76 #undef opts_parse_options
77 #undef opts_print
78 #undef opts_program_name
79 #undef opts_set_class_name
80 #undef opts_set_constants_prefix
81 #undef opts_set_delimiters
82 #undef opts_set_function_name
83 #undef opts_set_hash_name
84 #undef opts_set_initializer_suffix
85 #undef opts_set_language
86 #undef opts_set_lengthtable_name
87 #undef opts_set_slot_name
88 #undef opts_set_stringpool_name
89 #undef opts_set_total_switches
90 #undef opts_set_wordlist_name
91 #undef opts_short_usage
93 #undef OPTIONS_C
94 #undef OPTIONS_COMP
99 #undef OPTIONS_DUP
100 #undef OPTIONS_ENUM
103 #undef OPTIONS_KRC
113 #undef OPTIONS_TYPE
115 #undef posstrp_del
116 #undef posstrp_new
117 #undef posstrp_nextPosition
118 #endif
File namespace/output.c added (mode: 100644) (index 0000000..0204975)
1 #ifndef EPILOG
2 #define const_always cgperf_Output_always
3 #define const_for_struct cgperf_Output_const_for_struct
4 #define const_readonly_array cgperf_Output_readonly_array
5 #define Duplicate_Entry cgperf_Output_Duplicate_Entry
6 #define output_comparison_memcmp cgperf_output_comparison_memcmp
7 #define output_comparison_strncmp cgperf_output_comparison_strncmp
8 #define output_comparison_strcmp cgperf_output_comparison_strcmp
9 #define output_const_type cgperf_Output_const_type
10 #define output_constant_define cgperf_Output_output_constant_define
11 #define output_constant_enum cgperf_Output_output_constant_enum
12 #define output_firstchar_comparison cgperf_Output_output_firstchar_comparison
13 #define output_keyword_blank_entries cgperf_Output_output_keyword_blank_entries
14 #define output_keyword_entry cgperf_Output_output_keyword_entry
15 #define output_line_directive cgperf_Output_output_line_directive
16 #define output_string cgperf_Output_output_string
17 #define output_switches cgperf_Output_output_switches
18 #define output_switch_case cgperf_Output_output_switch_case
19 #define output_upperlower_memcmp cgperf_Output_upperlower_memcmp
20 #define output_upperlower_table cgperf_Output_output_upperlower_table
21 #define output_upperlower_strcmp cgperf_Output_outpt_upperlower_strcmp
22 #define output_upperlower_strncmp cgperf_Output_outpt_upperlower_strncmp
23 #define register_scs cgperf_Output_register_scs
24 #define smallest_integral_type cgperf_Output_smallest_integral_type
25 #define smallest_integral_type_2 cgperf_Output_smallest_integral_type_2
26 #else /* EPILOG */
27 #undef const_always
28 #undef const_for_struct
29 #undef const_readonly_array
30 #undef Duplicate_Entry
31 #undef output_comparison_memcmp
32 #undef output_comparison_strncmp
33 #undef output_comparison_strcmp
34 #undef output_const_type
35 #undef output_constant_define
36 #undef output_constant_enum
37 #undef output_firstchar_comparison
38 #undef output_keyword_blank_entries
39 #undef output_keyword_entry
40 #undef output_line_directive
41 #undef output_string
42 #undef output_switches
43 #undef output_switch_case
44 #undef output_upperlower_memcmp
45 #undef output_upperlower_table
46 #undef output_upperlower_strcmp
47 #undef output_upperlower_strncmp
48 #undef register_scs
49 #undef smallest_integral_type
50 #undef smallest_integral_type_2
51 #endif
File namespace/output.h added (mode: 100644) (index 0000000..d157a74)
1 #ifndef EPILOG
2 #define Output cgperf_Output
3 #define output_asso_values_index cgperf_Output_output_asso_values_index
4 #define output_asso_values_ref cgperf_Output_output_asso_values_ref
5 #define output_compute_min_max cgperf_Output_compute_min_max
6 #define output_constants_defines cgperf_Output_output_constants_defines
7 #define output_constants_enum cgperf_Output_output_constants_enum
8 #define output_del cgperf_Output_del
9 #define output_do cgperf_Output_output_do
10 #define output_hash_function cgperf_Output_output_hash_function
11 #define output_keylength_table cgperf_Output_output_keylength_table
12 #define output_keyword_table cgperf_Output_output_keyword_table
13 #define output_lookup_array cgperf_Output_output_lookup_array
14 #define output_lookup_function cgperf_Output_output_lookup_function
15 #define output_lookup_function_body cgperf_Output_output_lookup_function_body
16 #define output_lookup_pools cgperf_Output_output_lookup_pools
17 #define output_lookup_tables cgperf_Output_output_lookup_tables
18 #define output_new cgperf_Output_new
19 #define output_num_hash_values cgperf_Output_num_hash_values
20 #define output_string_pool cgperf_Output_string_pool
21 #else /* EPILOG */
22 #undef output_num_hash_values
23 #undef Ouput
24 #undef output_asso_values_index
25 #undef output_asso_values_ref
26 #undef output_constants_defines
27 #undef output_constants_enum
28 #undef output_del
29 #undef output_do
30 #undef output_hash_function
31 #undef output_lookup_function_body
32 #undef output_keylength_table
33 #undef output_keyword_table
34 #undef output_lookup_array
35 #undef output_lookup_function
36 #undef output_lookup_pools
37 #undef output_lookup_tables
38 #undef output_new
39 #undef output_string_pool
40 #endif
File namespace/positions.h added (mode: 100644) (index 0000000..65af641)
1 #ifndef EPILOG
2 #define pos_add cgperf_Positions_add
3 #define pos_contains cgperf_Positions_contains
4 #define pos_cpy cgperf_Positions_cpy
5 #define pos_del cgperf_Positions_del
6 #define pos_iterator cgperf_Positions_iterator
7 #define pos_iterator_all cgperf_Positions_iterator_all
11 #define pos_new cgperf_Positions_new
12 #define pos_new_cpy cgperf_Positions_new_cpy
13 #define pos_print cgperf_Positions_print
14 #define pos_remove cgperf_Positions_remove
15 #define pos_reviterator cgperf_Positions_reviterator
16 #define pos_set_useall cgperf_Positions_set_useall
17 #define pos_sort cgperf_Positions_sort
18 #define positer_del cgperf_PositionsIterator_del
20 #define positer_new cgperf_PositionIterator_new
21 #define positer_new_all cgperf_PositionIterator_new_all
22 #define positer_next cgperf_PositionIterator_next
23 #define positer_remaining cgperf_PositionIterator_remaining
25 #define Positions cgperf_Positions
26 #define PositionIterator cgperf_PositionIterator
27 #define PositionReverseIterator cgperf_PositionReverseIterator
28 #define posrevit_del cgperf_PositionReverseIterator_del
30 #define posrevit_new cgperf_PositionReverseIterator_new
31 #define posrevit_next cgperf_PositionReverseIterator_next
32 /*############################################################################*/
33 #else /* EPILOG */
34 #undef pos_add
35 #undef pos_contains
36 #undef pos_cpy
37 #undef pos_del
38 #undef pos_iterator
39 #undef pos_iterator_all
40 #undef POS_LASTCHAR
41 #undef POS_MAX_KEY_POS
42 #undef POS_MAX_SIZE
43 #undef pos_new
44 #undef pos_new_cpy
45 #undef pos_print
46 #undef pos_remove
47 #undef pos_reviterator
48 #undef pos_set_useall
49 #undef pos_sort
50 #undef positer_del
51 #undef POSITER_EOS
52 #undef positer_new
53 #undef positer_new_all
54 #undef positer_next
55 #undef positer_remaining
56 #undef Positions
57 #undef PositionIterator
58 #undef posrevit_del
59 #undef POSREVIT_EOS
60 #undef posrevit_new
61 #undef posrevit_next
62 #endif
File namespace/search.c added (mode: 100644) (index 0000000..9bd8241)
1 #ifndef EPILOG
2 #define delete_partition cgperf_Search_delete_partition
3 #define equals cgperf_Search_equals
4 #define EquivalenceClass cgperf_Search_EquivalenceClass
5 #define less_by_hash_value cgperf_Search_less_by_hash_value
6 #define Step cgperf_Search_Step
7 /*############################################################################*/
8 #else /* EPILOG */
9 #undef delete_partition
10 #undef equals
11 #undef EquivalenceClass
12 #undef less_by_hash_value
13 #undef Step
14 #endif
File namespace/search.h added (mode: 100644) (index 0000000..e18bfb7)
1 #ifndef EPILOG
2 #define schr_compute_alpha_size cgperf_Search_compute_alpha_size
3 #define schr_compute_alpha_size_with_inc cgperf_Search_compute_alpha_size_with_inc
4 #define schr_compute_alpha_unify cgperf_Search_compute_alpha_unify
5 #define schr_compute_alpha_unify_with_inc cgperf_Search_compute_alpha_unify_with_inc
6 #define schr_compute_hash cgperf_Search_compute_hash
7 #define schr_compute_partition cgperf_Search_compute_partition
8 #define schr_count_duplicates_multiset cgperf_Search_count_duplicates_multiset
9 #define schr_count_duplicates_tuple cgperf_Search_count_duplicates_tuple
10 #define schr_count_duplicates_tuple_do cgperf_Search_count_duplicates_tuple_do
11 #define schr_count_possible_collisions cgperf_Search_count_possible_collisions
12 #define schr_del cgperf_Search_del
13 #define schr_delete_partition cgperf_Search_delete_partition
14 #define schr_delete_selchars cgperf_Search_delete_selchars
15 #define schr_find_alpha_inc cgperf_Search_find_alpha_inc
16 #define schr_find_asso_values cgperf_Serach_find_asso_values
17 #define schr_find_good_asso_values cgperf_Search_find_good_asso_values
18 #define schr_find_positions cgperf_Search_find_positions
19 #define schr_init_selchars_multiset cgperf_Search_init_selchars_multiset
20 #define schr_init_selchars_tuple cgperf_Search_init_selchars_tuple
21 #define schr_new cgperf_Search_new
22 #define schr_optimize cgperf_Search_optimize
23 #define schr_prepare cgperf_Search_prepare
24 #define schr_prepare_asso_values cgperf_Search_prepare_asso_values
25 #define schr_sort cgperf_Search_sort
26 #define schr_unchanged_partition cgperf_Search_unchanged_partition
27 #define Search cgperf_Search
28 /*############################################################################*/
29 #else /* EPILOG */
30 #undef schr_compute_alpha_size
31 #undef schr_compute_alpha_size_with_inc
32 #undef schr_compute_alpha_unify
33 #undef schr_compute_alpha_unify_with_inc
34 #undef schr_compute_hash
35 #undef schr_compute_partition
36 #undef schr_count_duplicates_multiset
37 #undef schr_count_duplicates_tuple
38 #undef schr_count_duplicates_tuple_do
39 #undef schr_count_possible_collisions
40 #undef schr_del
41 #undef schr_delete_partition
42 #undef schr_delete_selchars
43 #undef schr_find_alpha_inc
44 #undef schr_find_asso_values
45 #undef schr_find_good_asso_values
46 #undef schr_find_positions
47 #undef schr_init_selchars_multiset
48 #undef schr_init_selchars_tuple
49 #undef schr_new
50 #undef schr_optimize
51 #undef schr_prepare
52 #undef schr_prepare_asso_values
53 #undef schr_sort
54 #undef schr_unchanged_partition
55 #undef Search
56 #endif
File options.c added (mode: 100644) (index 0000000..1fb9da3)
3 #include <limits.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <getopt.h>
7 #include <ctype.h>
8 #include <string.h>
10 #include "globals.h"
11 #include "options.h"
12 #include "version.h"
13 #include "positions.h"
14 /*------------------------------------------------------------------------------------------------*/
15 #include "namespace/globals.h"
16 #include "namespace/options.h"
17 #include "namespace/positions.h"
18 /*------------------------------------------------------------------------------------------------*/
19 /*{{{ defaults */
20 /* default struct initializer suffix */
22 /* default name for the key component */
23 static u8 *DEFAULT_SLOT_NAME = "name";
24 /* default delimiters that separate keywords from their attributes */
25 static u8 *DEFAULT_DELIMITERS = ",";
26 /* default name for generated hash function */
27 static u8 *DEFAULT_HASH_NAME = "hash";
28 /* default name for generated lookup function */
29 static u8 *DEFAULT_FUNCTION_NAME = "in_word_set";
30 /* default name for the generated class */
31 static u8 *DEFAULT_CLASS_NAME = "Perfect_Hash";
32 /* default name for string pool */
33 static u8 *DEFAULT_STRINGPOOL_NAME = "stringpool";
34 /* default prefix for constants */
35 static u8 *DEFAULT_CONSTANTS_PREFIX = "";
36 /* default name for generated hash table array */
37 static u8 *DEFAULT_WORDLIST_NAME = "wordlist";
38 /* default name for generated length table array */
39 static u8 *DEFAULT_LENGTHTABLE_NAME = "lengthtable";
40 /*}}} default -- END */
41 /*{{{ opts_new */
42 static struct Options *opts_new(void)
43 {
44 struct Options *t;
46 t = calloc(1, sizeof(*t));
47 t->option_word = OPTS_ANSIC;
49 t->total_switches = 1;
50 t->size_multiple = 1;
51 t->function_name = DEFAULT_FUNCTION_NAME;
52 t->slot_name = DEFAULT_SLOT_NAME;
53 t->initializer_suffix = DEFAULT_INITIALIZER_SUFFIX;
54 t->class_name = DEFAULT_CLASS_NAME;
55 t->hash_name = DEFAULT_HASH_NAME;
56 t->wordlist_name = DEFAULT_WORDLIST_NAME;
57 t->lengthtable_name = DEFAULT_LENGTHTABLE_NAME;
58 t->stringpool_name = DEFAULT_STRINGPOOL_NAME;
59 t->constants_prefix = DEFAULT_CONSTANTS_PREFIX;
60 t->delimiters = DEFAULT_DELIMITERS;
61 t->key_positions = pos_new();
62 return t;
63 }/*}}}*/
64 /*{{{ opts_del */
65 static void opts_del(struct Options *t)
66 {
67 if (OPTS(DEBUG)) {
68 struct PositionIterator *iter;
69 s32 pos;
71 fprintf(stderr, "\ndumping Options:"
72 "\nTYPE is........: %s"
73 "\nUPPERLOWER is..: %s"
74 "\nKRC is.........: %s"
75 "\nC is...........: %s"
76 "\nANSIC is.......: %s"
77 "\nCPLUSPLUS is...: %s"
78 "\nSEVENBIT is....: %s"
79 "\nLENTABLE is....: %s"
80 "\nCOMP is........: %s"
81 "\nCONST is.......: %s"
82 "\nENUM is........: %s"
83 "\nINCLUDE is.....: %s"
84 "\nGLOBAL is......: %s"
85 "\nNULLSTRINGS is.: %s"
86 "\nSHAREDLIB is...: %s"
87 "\nSWITCH is......: %s"
88 "\nNOTYPE is......: %s"
89 "\nDUP is.........: %s"
90 "\nNOLENGTH is....: %s"
91 "\nRANDOM is......: %s"
92 "\nDEBUG is.......: %s"
93 "\nlookup function name = %s"
94 "\nhash function name = %s"
95 "\nword list name = %s"
96 "\nlength table name = %s"
97 "\nstring pool name = %s"
98 "\nslot name = %s"
99 "\ninitializer suffix = %s"
100 "\nasso_values iterations = %d"
101 "\njump value = %d"
102 "\nhash table size multiplier = %g"
103 "\ninitial associated value = %d"
104 "\ndelimiters = %s"
105 "\nnumber of switch statements = %d\n",
106 OPTS(TYPE) ? "enabled" : "disabled",
107 OPTS(UPPERLOWER) ? "enabled" : "disabled",
108 OPTS(KRC) ? "enabled" : "disabled",
109 OPTS(C) ? "enabled" : "disabled",
110 OPTS(ANSIC) ? "enabled" : "disabled",
111 OPTS(CPLUSPLUS) ? "enabled" : "disabled",
112 OPTS(SEVENBIT) ? "enabled" : "disabled",
113 OPTS(LENTABLE) ? "enabled" : "disabled",
114 OPTS(COMP) ? "enabled" : "disabled",
115 OPTS(CONST) ? "enabled" : "disabled",
116 OPTS(ENUM) ? "enabled" : "disabled",
117 OPTS(INCLUDE) ? "enabled" : "disabled",
118 OPTS(GLOBAL) ? "enabled" : "disabled",
119 OPTS(NULLSTRINGS) ? "enabled" : "disabled",
120 OPTS(SHAREDLIB) ? "enabled" : "disabled",
121 OPTS(SWITCH) ? "enabled" : "disabled",
122 OPTS(NOTYPE) ? "enabled" : "disabled",
123 OPTS(DUP) ? "enabled" : "disabled",
124 OPTS(NOLENGTH) ? "enabled" : "disabled",
125 OPTS(RANDOM) ? "enabled" : "disabled",
126 OPTS(DEBUG) ? "enabled" : "disabled",
127 t->function_name, t->hash_name, t->wordlist_name, t->lengthtable_name,
128 t->stringpool_name, t->slot_name, t->initializer_suffix,
129 t->asso_iterations, t->jump, t->size_multiple, t->initial_asso_value,
130 t->delimiters, t->total_switches);
131 if (t->key_positions->useall)
132 fprintf(stderr, "all characters are used in the hash function\n");
133 else {
134 fprintf(stderr, "maximum keysig size = %d\nkey positions are: \n", t->key_positions->size);
135 iter = pos_iterator_all(t->key_positions);
136 loop {
137 pos = positer_next(iter);
138 if (pos == POSITER_EOS)
139 break;
140 if (pos == POS_LASTCHAR)
141 fprintf(stderr, "$\n");
142 else
143 fprintf(stderr, "%d\n", pos + 1);
145 }
146 }
147 fprintf (stderr, "finished dumping Options\n");
148 }
149 pos_del(t->key_positions);
150 free(t);
151 }/*}}}*/
152 /*{{{ opts_long_options
153 Parses the command line Options and sets appropriate flags in option_word. */
154 static const struct option opts_long_options[] =
155 {
156 { "output-file", required_argument, NULL, CHAR_MAX + 1 },
157 { "ignore-case", no_argument, NULL, CHAR_MAX + 2 },
158 { "delimiters", required_argument, NULL, 'e' },
159 { "struct-type", no_argument, NULL, 't' },
160 { "language", required_argument, NULL, 'L' },
161 { "slot-name", required_argument, NULL, 'K' },
162 { "initializer-suffix", required_argument, NULL, 'F' },
163 { "hash-fn-name", required_argument, NULL, 'H' }, /* backward compatibility */
164 { "hash-function-name", required_argument, NULL, 'H' },
165 { "lookup-fn-name", required_argument, NULL, 'N' }, /* backward compatibility */
166 { "lookup-function-name", required_argument, NULL, 'N' },
167 { "class-name", required_argument, NULL, 'Z' },
168 { "seven-bit", no_argument, NULL, '7' },
169 { "compare-strncmp", no_argument, NULL, 'c' },
170 { "readonly-tables", no_argument, NULL, 'C' },
171 { "enum", no_argument, NULL, 'E' },
172 { "includes", no_argument, NULL, 'I' },
173 { "global-table", no_argument, NULL, 'G' },
174 { "constants-prefix", required_argument, NULL, CHAR_MAX + 5 },
175 { "word-array-name", required_argument, NULL, 'W' },
176 { "length-table-name", required_argument, NULL, CHAR_MAX + 4 },
177 { "switch", required_argument, NULL, 'S' },
178 { "omit-struct-type", no_argument, NULL, 'T' },
179 { "key-positions", required_argument, NULL, 'k' },
180 { "compare-strlen", no_argument, NULL, 'l' }, /* backward compatibility */
181 { "compare-lengths", no_argument, NULL, 'l' },
182 { "duplicates", no_argument, NULL, 'D' },
183 { "fast", required_argument, NULL, 'f' },
184 { "initial-asso", required_argument, NULL, 'i' },
185 { "jump", required_argument, NULL, 'j' },
186 { "multiple-iterations", required_argument, NULL, 'm' },
187 { "no-strlen", no_argument, NULL, 'n' },
188 { "occurrence-sort", no_argument, NULL, 'o' },
189 { "optimized-collision-resolution", no_argument, NULL, 'O' },
190 { "pic", no_argument, NULL, 'P' },
191 { "string-pool-name", required_argument, NULL, 'Q' },
192 { "null-strings", no_argument, NULL, CHAR_MAX + 3 },
193 { "random", no_argument, NULL, 'r' },
194 { "size-multiple", required_argument, NULL, 's' },
195 { "help", no_argument, NULL, 'h' },
196 { "version", no_argument, NULL, 'v' },
197 { "debug", no_argument, NULL, 'd' },
198 { NULL, no_argument, NULL, 0 }
199 };/*}}}*/
200 /*{{{ opts_parse_options */
201 static void opts_parse_options(struct Options *t, u32 argc, u8 **argv)
202 {
203 opts_program_name = (u8*)argv[0];
204 t->argument_count = argc;
205 t->argument_vector = argv;
207 loop {
208 int option_char;
210 option_char = getopt_long(t->argument_count, t->argument_vector,
211 "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oOpPQ:rs:S:tTvW:Z:7", opts_long_options,
212 NULL);
213 if (option_char == -1)
214 break;
215 switch (option_char) {
216 case 'a': /* generated code uses the ANSI prototype format */
217 break; /* This is now the default */
218 case 'c': /* generate strncmp rather than strcmp */
219 t->option_word |= OPTS_COMP;
220 break;
221 case 'C': /* make the generated tables readonly (const) */
222 t->option_word |= OPTS_CONST;
223 break;
224 case 'd': /* enable debugging option */
225 t->option_word |= OPTS_DEBUG;
226 fprintf(stderr, "Starting program %s, version %s, with debugging on.\n", opts_program_name, cgperf_version_string);
227 break;
228 case 'D': /* enable duplicate option */
229 t->option_word |= OPTS_DUP;
230 break;
231 case 'e': /* specify keyword/attribute separator */
232 t->delimiters = /*getopt*/(u8*)optarg;
233 break;
234 case 'E':
235 t->option_word |= OPTS_ENUM;
236 break;
237 case 'f': /* generate the hash table "fast" */
238 break; /* Not needed any more */
239 case 'F':
240 t->initializer_suffix = /*getopt*/(u8*)optarg;
241 break;
242 case 'g': /* use the 'inline' keyword for generated sub-routines, ifdef __GNUC__ */
243 break; /* This is now the default */
244 case 'G': /* make the keyword table a global variable */
245 t->option_word |= OPTS_GLOBAL;
246 break;
247 case 'h': /* displays a list of helpful Options to the user */
248 opts_long_usage(stdout);
249 exit(0);
250 case 'H': /* sets the name for the hash function */
251 t->hash_name = /*getopt*/(u8*)optarg;
252 break;
253 case 'i': /* sets the initial value for the associated values array */
254 t->initial_asso_value = atoi(/*getopt*/optarg);
255 if (t->initial_asso_value < 0)
256 fprintf(stderr, "Initial value %d should be non-zero, ignoring and continuing.\n", t->initial_asso_value);
257 if (OPTS(RANDOM))
258 fprintf(stderr, "warning, -r option superceeds -i, ignoring -i option and continuing\n");
259 break;
260 case 'I': /* enable #include statements */
261 t->option_word |= OPTS_INCLUDE;
262 break;
263 case 'j': /* sets the jump value, must be odd for later algorithms */
264 t->jump = atoi (/*getopt*/optarg);
265 if (t->jump < 0) {
266 fprintf(stderr, "Jump value %d must be a positive number.\n", t->jump);
267 opts_short_usage(stderr);
268 exit(1);
269 } else if ((t->jump != 0) && ((t->jump % 2) == 0))
270 fprintf (stderr, "Jump value %d should be odd, adding 1 and continuing...\n", t->jump++);
271 break;
272 case 'k': { /* sets key positions used for hash function */
273 t->option_word |= OPTS_POSITIONS;
274 s32 BAD_VALUE = -3;
275 s32 EOS = POSITER_EOS;
276 s32 value;
277 struct PositionStringParser *sparser;
279 sparser = posstrp_new(/*getopt*/(u8*)optarg, 1,
282 if (/*getopt*/optarg[0] == '*') /* use all the characters for hashing!!!! */
283 pos_set_useall(t->key_positions, true);
284 else {
285 s32 *key_positions;
286 s32 *key_pos;
287 u32 total_keysig_size;
289 pos_set_useall(t->key_positions, false);
290 key_positions = t->key_positions->positions;
292 key_pos = key_positions;
293 loop {
294 value = posstrp_nextPosition(sparser);
295 if (value == EOS)
296 break;
297 if (value == BAD_VALUE) {
298 fprintf(stderr, "Invalid position value or range, use 1,2,3-%d,'$' or '*'.\n", POS_MAX_KEY_POS);
299 opts_short_usage(stderr);
300 exit(1);
301 }
302 if ((key_pos - key_positions) == POS_MAX_SIZE) {
303 /*
304 * More than Positions_max_size key positions.
305 * Since all key positions are in the range
306 * 0..Positions_max_key_pos-1 or == Positions_lastchar,
307 * there must be duplicates.
308 */
309 fprintf(stderr, "Duplicate key positions selected\n");
310 opts_short_usage(stderr);
311 exit(1);
312 }
313 if (value != POS_LASTCHAR)
314 /* We use 0-based indices in the class Positions */
315 value = value - 1;
316 *key_pos = value;
317 ++key_pos;
318 }
319 total_keysig_size = key_pos - key_positions;
320 if (total_keysig_size == 0) {
321 fprintf(stderr, "No key positions selected.\n");
322 opts_short_usage(stderr);
323 exit(1);
324 }
325 t->key_positions->size = total_keysig_size;
326 /*
327 * Sorts the key positions *IN REVERSE ORDER!!*
328 * This makes further routines more efficient.
329 * Especially when generating code.
330 */
331 if (!pos_sort(t->key_positions)) {
332 fprintf(stderr, "Duplicate key positions selected\n");
333 opts_short_usage(stderr);
334 exit(1);
335 }
336 }
337 break;}
338 case 'K':/* make this the keyname for the keyword component field */
339 t->slot_name = /*getopt*/optarg;
340 break;
341 case 'l':/* create length table to avoid extra string compares */
342 t->option_word |= OPTS_LENTABLE;
343 break;
344 case 'L':/* deal with different generated languages */
345 t->language = 0;
346 opts_set_language(t,/*getopt*/optarg);
347 break;
348 case 'm':/* multiple iterations for finding good asso_values */
349 t->asso_iterations = atoi(/*getopt*/optarg);
350 if (t->asso_iterations < 0) {
351 fprintf(stderr, "asso_iterations value must not be negative, assuming 0\n");
352 t->asso_iterations = 0;
353 }
354 break;
355 case 'n':/* don't include the length when computing hash function */
356 t->option_word |= OPTS_NOLENGTH;
357 break;
358 case 'N':/* make generated lookup function name be optarg */
359 t->function_name = /*getopt*/optarg;
360 break;
361 case 'o':/* order input by frequency of key set occurrence */
362 break; /* not needed any more */
363 case 'O':/* optimized choice during collision resolution */
364 break; /* not needed any more */
365 case 'p':/* generated lookup function a pointer instead of int */
366 break; /* this is now the default */
367 case 'P':/* optimize for position-independent code */
368 t->option_word |= OPTS_SHAREDLIB;
369 break;
370 case 'Q':/* sets the name for the string pool */
371 t->stringpool_name = /*getopt*/optarg;
372 break;
373 case 'r':/* utilize randomness to initialize the associated values table */
374 t->option_word |= OPTS_RANDOM;
375 if (t->initial_asso_value != 0)
376 fprintf(stderr, "warning, -r option supersedes -i, disabling -i option and continuing\n");
377 break;
378 case 's':{/* range of associated values, determines size of final table */
379 f32 numerator;
380 f32 denominator;
381 bool invalid;
382 u8 *endptr;
384 denominator = 1;
385 invalid = false;
386 numerator = strtod(/*getopt*/optarg, &endptr);
387 if (endptr == /*getopt*/(u8*)optarg)
388 invalid = true;
389 else if (*endptr != '\0') {
390 if (*endptr == '/') {
391 u8 *denomptr;
393 denomptr = endptr + 1;
394 denominator = strtod(denomptr, &endptr);
395 if (endptr == denomptr || *endptr != '\0')
396 invalid = true;
397 } else
398 invalid = true;
399 }
400 if (invalid) {
401 fprintf(stderr, "Invalid value for option -s.\n");
402 opts_short_usage(stderr);
403 exit (1);
404 }
405 t->size_multiple = numerator / denominator;
406 /* backward compatibility: -3 means 1/3 */
407 if (t->size_multiple < 0)
408 t->size_multiple = 1 / (-t->size_multiple);
409 /* catch stupid users and port to C the c++ from stupid coders */
410 if (t->size_multiple == 0)
411 t->size_multiple = 1;
412 /* warnings */
413 if (t->size_multiple > 50)
414 fprintf(stderr, "Size multiple %g is excessive, did you really mean this?! (try '%s --help' for help)\n", t->size_multiple, opts_program_name);
415 else if (t->size_multiple < 0.01f)
416 fprintf(stderr, "Size multiple %g is extremely small, did you really mean this?! (try '%s --help' for help)\n", t->size_multiple, opts_program_name);
417 break;}
418 case 'S':/* generate switch statement output, rather than lookup table */
419 t->option_word |= OPTS_SWITCH;
420 t->total_switches = atoi(/*getopt*/optarg);
421 if (t->total_switches <= 0) {
422 fprintf(stderr, "number of switches %s must be a positive number\n", /*getopt*/optarg);
423 opts_short_usage (stderr);
424 exit(1);
425 }
426 break;
427 case 't':/* enable the TYPE mode, allowing arbitrary user structures */
428 t->option_word |= OPTS_TYPE;
429 break;
430 case 'T':/* don't print structure definition */
431 t->option_word |= OPTS_NOTYPE;
432 break;
433 case 'v':/* print out the version and quit */
434 fprintf(stdout, "GNU gperf %s\n", cgperf_version_string);
435 fprintf(stdout, "Copyright (C) %s Free Software Foundation, Inc.\n\
436 License GPLv3+: GNU GPL version 3 or later <>\n\
437 This is free software: you are free to change and redistribute it.\n\
438 There is NO WARRANTY, to the extent permitted by law.\n\
439 ",
440 "1989-2018");
441 fprintf(stdout, "Written by %s and %s. C89 with benign bits of C99/C11 port by Sylvain BERTRAND\n", "Douglas C. Schmidt", "Bruno Haible");
442 exit(0);
443 case 'W':/* sets the name for the hash table array */
444 t->wordlist_name = /*getopt*/optarg;
445 break;
446 case 'Z':/* set the class name */
447 t->class_name = /*getopt*/optarg;
448 break;
449 case '7':/* assume 7-bit characters */
450 t->option_word |= OPTS_SEVENBIT;
451 break;
452 case CHAR_MAX + 1:/* set the output file name */
453 t->output_file_name = /*getopt*/optarg;
454 break;
455 case CHAR_MAX + 2:/* case insignificant */
456 t->option_word |= OPTS_UPPERLOWER;
457 break;
458 case CHAR_MAX + 3:/* use NULL instead of "" */
459 t->option_word |= OPTS_NULLSTRINGS;
460 break;
461 case CHAR_MAX + 4:/* sets the name for the length table array */
462 t->lengthtable_name = /*getopt*/optarg;
463 break;
464 case CHAR_MAX + 5:/* sets the prefix for the constants */
465 t->constants_prefix = /*getopt*/optarg;
466 break;
467 default:
468 opts_short_usage(stderr);
469 exit(1);
470 }
471 }
472 if (/*getopt*/optind < argc)
473 t->input_file_name = argv[/*getopt*/optind++];
475 if (/*getopt*/optind < argc) {
476 fprintf(stderr, "Extra trailing arguments to %s.\n", opts_program_name);
477 opts_short_usage(stderr);
478 exit(1);
479 }
480 }/*}}}*/
481 /*{{{ opts_short_usage */
482 static void opts_short_usage(FILE * stream)
483 {
484 fprintf(stream, "Try '%s --help' for more information.\n", opts_program_name);
485 }/*}}}*/
486 /*{{{ opts_long_usage */
487 static void opts_long_usage(FILE * stream)
488 {
489 fprintf(stream,
490 "GNU 'gperf' generates perfect hash functions.\n");
491 fprintf(stream, "\n");
492 fprintf(stream,
493 "Usage: %s [OPTION]... [INPUT-FILE]\n",
494 opts_program_name);
495 fprintf(stream, "\n");
496 fprintf(stream,
497 "If a long option shows an argument as mandatory, then it is mandatory\n"
498 "for the equivalent short option also.\n");
499 fprintf(stream, "\n");
500 fprintf(stream,
501 "Output file location:\n");
502 fprintf(stream,
503 " --output-file=FILE Write output to specified file.\n");
504 fprintf(stream,
505 "The results are written to standard output if no output file is specified\n"
506 "or if it is -.\n");
507 fprintf(stream, "\n");
508 fprintf(stream,
509 "Input file interpretation:\n");
510 fprintf(stream,
511 " -e, --delimiters=DELIMITER-LIST\n"
512 " Allow user to provide a string containing delimiters\n"
513 " used to separate keywords from their attributes.\n"
514 " Default is \",\".\n");
515 fprintf(stream,
516 " -t, --struct-type Allows the user to include a structured type\n"
517 " declaration for generated code. Any text before %%%%\n"
518 " is considered part of the type declaration. Key\n"
519 " words and additional fields may follow this, one\n"
520 " group of fields per line.\n");
521 fprintf(stream,
522 " --ignore-case Consider upper and lower case ASCII characters as\n"
523 " equivalent. Note that locale dependent case mappings\n"
524 " are ignored.\n");
525 fprintf(stream, "\n");
526 fprintf(stream,
527 "Language for the output code:\n");
528 fprintf(stream,
529 " -L, --language=LANGUAGE-NAME\n"
530 " Generates code in the specified language. Languages\n"
531 " handled are currently C++, ANSI-C, C, and KR-C. The\n"
532 " default is ANSI-C.\n");
533 fprintf(stream, "\n");
534 fprintf(stream,
535 "Details in the output code:\n");
536 fprintf(stream,
537 " -K, --slot-name=NAME Select name of the keyword component in the keyword\n"
538 " structure.\n");
539 fprintf(stream,
540 " -F, --initializer-suffix=INITIALIZERS\n"
541 " Initializers for additional components in the keyword\n"
542 " structure.\n");
543 fprintf(stream,
544 " -H, --hash-function-name=NAME\n"
545 " Specify name of generated hash function. Default is\n"
546 " 'hash'.\n");
547 fprintf(stream,
548 " -N, --lookup-function-name=NAME\n"
549 " Specify name of generated lookup function. Default\n"
550 " name is 'in_word_set'.\n");
551 fprintf(stream,
552 " -Z, --class-name=NAME Specify name of generated C++ class. Default name is\n"
553 " 'Perfect_Hash'.\n");
554 fprintf(stream,
555 " -7, --seven-bit Assume 7-bit characters.\n");
556 fprintf(stream,
557 " -l, --compare-lengths Compare key lengths before trying a string\n"
558 " comparison. This is necessary if the keywords\n"
559 " contain NUL bytes. It also helps cut down on the\n"
560 " number of string comparisons made during the lookup.\n");
561 fprintf(stream,
562 " -c, --compare-strncmp Generate comparison code using strncmp rather than\n"
563 " strcmp.\n");
564 fprintf(stream,
565 " -C, --readonly-tables Make the contents of generated lookup tables\n"
566 " constant, i.e., readonly.\n");
567 fprintf(stream,
568 " -E, --enum Define constant values using an enum local to the\n"
569 " lookup function rather than with defines.\n");
570 fprintf(stream,
571 " -I, --includes Include the necessary system include file <string.h>\n"
572 " at the beginning of the code.\n");
573 fprintf(stream,
574 " -G, --global-table Generate the static table of keywords as a static\n"
575 " global variable, rather than hiding it inside of the\n"
576 " lookup function (which is the default behavior).\n");
577 fprintf(stream,
578 " -P, --pic Optimize the generated table for inclusion in shared\n"
579 " libraries. This reduces the startup time of programs\n"
580 " using a shared library containing the generated code.\n");
581 fprintf(stream,
582 " -Q, --string-pool-name=NAME\n"
583 " Specify name of string pool generated by option --pic.\n"
584 " Default name is 'stringpool'.\n");
585 fprintf(stream,
586 " --null-strings Use NULL strings instead of empty strings for empty\n"
587 " keyword table entries.\n");
588 fprintf(stream,
589 " --constants-prefix=PREFIX\n"
590 " Specify prefix for the constants like TOTAL_KEYWORDS.\n");
591 fprintf(stream,
592 " -W, --word-array-name=NAME\n"
593 " Specify name of word list array. Default name is\n"
594 " 'wordlist'.\n");
595 fprintf(stream,
596 " --length-table-name=NAME\n"
597 " Specify name of length table array. Default name is\n"
598 " 'lengthtable'.\n");
599 fprintf(stream,
600 " -S, --switch=COUNT Causes the generated C code to use a switch\n"
601 " statement scheme, rather than an array lookup table.\n"
602 " This can lead to a reduction in both time and space\n"
603 " requirements for some keyfiles. The COUNT argument\n"
604 " determines how many switch statements are generated.\n"
605 " A value of 1 generates 1 switch containing all the\n"
606 " elements, a value of 2 generates 2 tables with 1/2\n"
607 " the elements in each table, etc. If COUNT is very\n"
608 " large, say 1000000, the generated C code does a\n"
609 " binary search.\n");
610 fprintf(stream,
611 " -T, --omit-struct-type\n"
612 " Prevents the transfer of the type declaration to the\n"
613 " output file. Use this option if the type is already\n"
614 " defined elsewhere.\n");
615 fprintf(stream, "\n");
616 fprintf(stream,
617 "Algorithm employed by gperf:\n");
618 fprintf(stream,
619 " -k, --key-positions=KEYS\n"
620 " Select the key positions used in the hash function.\n"
621 " The allowable choices range between 1-%d, inclusive.\n"
622 " The positions are separated by commas, ranges may be\n"
623 " used, and key positions may occur in any order.\n"
624 " Also, the meta-character '*' causes the generated\n"
625 " hash function to consider ALL key positions, and $\n"
626 " indicates the \"final character\" of a key, e.g.,\n"
627 " $,1,2,4,6-10.\n",
629 fprintf(stream,
630 " -D, --duplicates Handle keywords that hash to duplicate values. This\n"
631 " is useful for certain highly redundant keyword sets.\n");
632 fprintf(stream,
633 " -m, --multiple-iterations=ITERATIONS\n"
634 " Perform multiple choices of the -i and -j values,\n"
635 " and choose the best results. This increases the\n"
636 " running time by a factor of ITERATIONS but does a\n"
637 " good job minimizing the generated table size.\n");
638 fprintf(stream,
639 " -i, --initial-asso=N Provide an initial value for the associate values\n"
640 " array. Default is 0. Setting this value larger helps\n"
641 " inflate the size of the final table.\n");
642 fprintf(stream,
643 " -j, --jump=JUMP-VALUE Affects the \"jump value\", i.e., how far to advance\n"
644 " the associated character value upon collisions. Must\n"
645 " be an odd number, default is %d.\n",
647 fprintf(stream,
648 " -n, --no-strlen Do not include the length of the keyword when\n"
649 " computing the hash function.\n");
650 fprintf(stream,
651 " -r, --random Utilizes randomness to initialize the associated\n"
652 " values table.\n");
653 fprintf(stream,
654 " -s, --size-multiple=N Affects the size of the generated hash table. The\n"
655 " numeric argument N indicates \"how many times larger\n"
656 " or smaller\" the associated value range should be,\n"
657 " in relationship to the number of keys, e.g. a value\n"
658 " of 3 means \"allow the maximum associated value to\n"
659 " be about 3 times larger than the number of input\n"
660 " keys\". Conversely, a value of 1/3 means \"make the\n"
661 " maximum associated value about 3 times smaller than\n"
662 " the number of input keys\". A larger table should\n"
663 " decrease the time required for an unsuccessful\n"
664 " search, at the expense of extra table space. Default\n"
665 " value is 1.\n");
666 fprintf(stream, "\n");
667 fprintf(stream,
668 "Informative output:\n"
669 " -h, --help Print this message.\n"
670 " -v, --version Print the gperf version number.\n"
671 " -d, --debug Enables the debugging option (produces verbose\n"
672 " output to the standard error).\n");
673 fprintf(stream, "\n");
674 fprintf(stream,
675 "Report bugs to <>.\n");
676 }/*}}}*/
677 /*{{{ opts_set_language */
678 /* Sets the output language, if not already set */
679 void opts_set_language(struct Options *t, u8 *language)
680 {
681 if (t->language != 0)
682 return;
683 t->language = language;
684 t->option_word &= ~(OPTS_KRC | OPTS_C | OPTS_ANSIC | OPTS_CPLUSPLUS);
685 if (strcmp(language, "KR-C") == 0)
686 t->option_word |= OPTS_KRC;
687 else if (strcmp (language, "C") == 0)
688 t->option_word |= OPTS_C;
689 else if (strcmp (language, "ANSI-C") == 0)
690 t->option_word |= OPTS_ANSIC;
691 else if (strcmp (language, "C++") == 0)
692 t->option_word |= OPTS_CPLUSPLUS;
693 else {
694 fprintf(stderr, "unsupported language option %s, defaulting to ANSI-C\n", language);
695 t->option_word |= OPTS_ANSIC;
696 }
697 }/*}}}*/
698 /*{{{ opts_set_delimiters */
699 /* Sets the delimiters string, if not already set. */
700 static void opts_set_delimiters(struct Options *t, u8 *delimiters)
701 {
702 if (t->delimiters == DEFAULT_DELIMITERS)
703 t->delimiters = delimiters;
704 }/*}}}*/
705 /*{{{ opts_set_slot_name */
706 /* sets the keyword key name, if not already set */
707 static void opts_set_slot_name(struct Options *t, u8 *name)
708 {
709 if (t->slot_name == DEFAULT_SLOT_NAME)
710 t->slot_name = name;
711 }/*}}}*/
712 /*{{{ opts_set_initializer_suffix */
713 /* sets the struct initializer suffix, if not already set */
714 static void opts_set_initializer_suffix(struct Options *t, u8 *initializers)
715 {
716 if (t->initializer_suffix == DEFAULT_INITIALIZER_SUFFIX)
717 t->initializer_suffix = initializers;
718 }/*}}}*/
719 /*{{{ opts_set_hash_name */
720 /* sets the hash function name, if not already set */
721 static void opts_set_hash_name(struct Options *t, u8 *name)
722 {
723 if (t->hash_name == DEFAULT_HASH_NAME)
724 t->hash_name = name;
725 }/*}}}*/
726 /*{{{ opts_set_function_name */
727 /* sets the generated function name, if not already set */
728 static void opts_set_function_name(struct Options *t, u8 *name)
729 {
730 if (t->function_name == DEFAULT_FUNCTION_NAME)
731 t->function_name = name;
732 }/*}}}*/
733 /*{{{ opts_set_class_name */
734 /* sets the generated class name, if not already set */
735 static void opts_set_class_name(struct Options *t, u8 *name)
736 {
737 if (t->class_name == DEFAULT_CLASS_NAME)
738 t->class_name = name;
739 }/*}}}*/
740 /*{{{ opts_set_stringpool_name */
741 /* sets the string pool name, if not already set */
742 static void opts_set_stringpool_name(struct Options *t, u8 *name)
743 {
744 if (t->stringpool_name == DEFAULT_STRINGPOOL_NAME)
745 t->stringpool_name = name;
746 }/*}}}*/
747 /*{{{ opts_set_constants_prefix */
748 /* sets the prefix for the constants, if not already set */
749 static void opts_set_constants_prefix(struct Options *t, u8 *prefix)
750 {
751 if (t->constants_prefix == DEFAULT_CONSTANTS_PREFIX)
752 t->constants_prefix = prefix;
753 }/*}}}*/
754 /*{{{ opts_set_wordlist_name */
755 /* sets the hash table array name, if not already set */
756 static void opts_set_wordlist_name(struct Options *t, u8 *name)
757 {
758 if (t->wordlist_name == DEFAULT_WORDLIST_NAME)
759 t->wordlist_name = name;
760 }/*}}}*/
761 /*{{{ opts_set_lengthtable_name */
762 /* sets the length table array name, if not already set */
763 static void opts_set_lengthtable_name(struct Options *t, u8 *name)
764 {
765 if (t->lengthtable_name == DEFAULT_LENGTHTABLE_NAME)
766 t->lengthtable_name = name;
767 }/*}}}*/
768 /*{{{ opts_set_total_switches */
769 /* sets the total number of switch statements, if not already set */
770 static void opts_set_total_switches(struct Options *t, s32 total_switches)
771 {
772 if (!OPTS(SWITCH)) {
773 t->option_word |= OPTS_SWITCH;
774 t->total_switches = total_switches;
775 }
776 }/*}}}*/
777 /*{{{ posstrp_new */
778 static struct PositionStringParser *posstrp_new(u8 *str, s32 low_bound,
779 s32 high_bound, s32 end_word_marker, s32 error_value, s32 end_marker)
780 {
781 struct PositionStringParser *t;
783 t = calloc(1, sizeof(*t));
784 t->str = str;
785 t->low_bound = low_bound;
786 t->high_bound = high_bound;
787 t->end_word_marker = end_word_marker;
788 t->error_value = error_value;
789 t->end_marker = end_marker;
790 t->in_range = false;
791 return t;
792 }
793 /*}}}*/
794 /*{{{ posstrp_del */
795 static void posstrp_del(struct PositionStringParser *t)
796 {
797 free(t);
798 }/*}}}*/
799 /*{{{ posstrp_nextPosition */
800 /* Returns the next key position from the given string */
801 static s32 posstrp_nextPosition(struct PositionStringParser *t)
802 {
803 if (t->in_range) {
804 /* We are inside a range. Return the next value from the range */
805 if (++t->range_curr_value >= t->range_upper_bound)
806 t->in_range = false;
807 return t->range_curr_value;
808 }
809 /* we are not inside a range */
810 /* Continue parsing the given string */
811 loop {
812 if (t->str[0] == 0)
813 break;
814 switch (t->str[0]) {
815 case ',':
816 /* Skip the comma */
817 ++(t->str);
818 break;
819 case '$':
820 /* Valid key position */
821 ++(t->str);
822 return t->end_word_marker;
823 case '0': case '1': case '2': case '3': case '4':
824 case '5': case '6': case '7': case '8': case '9': {
825 /* Valid key position */
826 s32 curr_value;
828 curr_value = 0;
829 loop {
830 if (!isdigit((int)(t->str[0])))
831 break;
832 curr_value = curr_value * 10 + (t->str[0] - '0');
833 ++(t->str);
834 }
835 if (t->str[0] == '-') {
836 ++(t->str);
837 /* starting a range of key positions */
838 t->in_range = true;
840 t->range_upper_bound = 0;
841 loop {
842 if (!isdigit((int)(t->str[0])))
843 break;
844 t->range_upper_bound = t->range_upper_bound * 10
845 + (t->str[0] - '0');
846 ++(t->str);
847 }
848 /* Verify range's upper bound */
849 if (!(t->range_upper_bound > curr_value && t->range_upper_bound
850 <= t->high_bound))
851 return t->error_value;
852 t->range_curr_value = curr_value;
853 }
854 /* Verify range's lower bound */
855 if (!(curr_value >= t->low_bound && curr_value <= t->high_bound))
856 return t->error_value;
857 return curr_value;
858 }
859 default:
860 /* Invalid syntax. */
861 return t->error_value;
862 }
863 }
864 return t->end_marker;
865 }/*}}}*/
866 /*{{{ opts_print */
867 static void opts_print(struct Options *t)
868 {
869 s32 i;
871 printf("/* Command-line: ");
872 i = 0;
873 loop {
874 u8 *arg;
876 if (i >= t->argument_count)
877 break;
878 arg = t->argument_vector[i];
879 /* escape arg if it contains shell metacharacters */
880 if (*arg == '-') {
881 putchar(*arg);
882 ++arg;
883 if ((*arg >= 'A' && *arg <= 'Z') || (*arg >= 'a' && *arg <= 'z')) {
884 putchar(*arg);
885 ++arg;
886 } else if (*arg == '-') {
887 loop {
888 putchar(*arg);
889 ++arg;
890 if (!((*arg >= 'A' && *arg <= 'Z') || (*arg >= 'a'
891 && *arg <= 'z') || *arg == '-'))
892 break;
893 }
894 if (*arg == '=') {
895 putchar(*arg);
896 ++arg;
897 }
898 }
899 }
900 if (strpbrk(arg, "\t\n !\"#$&'()*;<>?[\\]`{|}~") != 0) {
901 if (strchr(arg, '\'') != 0) {
902 putchar('"');
903 loop {
904 if (*arg == 0)
905 break;
906 if (*arg == '\"' || *arg == '\\' || *arg == '$'
907 || *arg == '`')
908 putchar('\\');
909 putchar(*arg);
910 ++arg;
911 }
912 putchar('"');
913 } else {
914 putchar('\'');
915 loop {
916 if (*arg == 0)
917 break;
918 if (*arg == '\\')
919 putchar('\\');
920 putchar(*arg);
921 ++arg;
922 }
923 putchar('\'');
924 }
925 } else
926 printf("%s", arg);
927 printf(" ");
928 ++i;
929 }
930 printf(" */");
931 }
932 /*}}}*/
933 /*------------------------------------------------------------------------------------------------*/
934 #define EPILOG
935 #include "namespace/globals.h"
936 #include "namespace/options.h"
937 #include "namespace/positions.h"
938 #undef EPILOG
939 /*------------------------------------------------------------------------------------------------*/
940 #endif
File options.h added (mode: 100644) (index 0000000..b7853ef)
3 #include <stdbool.h>
4 #include <stdio.h>
5 #include "c_fixing.h"
6 /*------------------------------------------------------------------------------------------------*/
7 #include "namespace/positions.h"
8 #include "namespace/options.h"
9 /*------------------------------------------------------------------------------------------------*/
10 /*{{{ Options */
11 /*{{{ globals */
12 /* records the program name */
13 static u8 *opts_program_name;
14 /*}}} globals -- END */
15 /*{{{ constants */
16 enum {
17 /* size to jump on a collision */
19 /* enumeration of the possible boolean options */
20 /* --- input file interpretation --- */
21 /* handle user-defined type structured keyword input */
22 OPTS_TYPE = 1 << 0,
23 /* ignore case of ASCII characters */
24 OPTS_UPPERLOWER = 1 << 1,
25 /* --- language for the output code --- */
26 /* generate K&R C code: no prototypes, no const */
27 OPTS_KRC = 1 << 2,
28 /* generate C code: no prototypes, but const (user can #define it away) */
29 OPTS_C = 1 << 3,
30 /* generate ISO/ANSI C code: prototypes and const, but no class */
31 OPTS_ANSIC = 1 << 4,
32 /* generate C++ code: prototypes, const, class, inline, enum */
33 OPTS_CPLUSPLUS = 1 << 5,
34 /* --- details in the output code --- */
35 /* assume 7-bit, not 8-bit, characters */
36 OPTS_SEVENBIT = 1 << 6,
37 /* generate a length table for string comparison */
38 OPTS_LENTABLE = 1 << 7,
39 /* generate strncmp rather than strcmp */
40 OPTS_COMP = 1 << 8,
41 /* make the generated tables readonly (const) */
42 OPTS_CONST = 1 << 9,
43 /* use enum for constants */
44 OPTS_ENUM = 1 << 10,
45 /* generate #include statements */
46 OPTS_INCLUDE = 1 << 11,
47 /* make the keyword table a global variable */
48 OPTS_GLOBAL = 1 << 12,
49 /* use NULL strings instead of empty strings for empty table entries */
50 OPTS_NULLSTRINGS = 1 << 13,
51 /* optimize for position-independent code */
52 OPTS_SHAREDLIB = 1 << 14,
53 /* generate switch output to save space */
54 OPTS_SWITCH = 1 << 15,
55 /* don't include user-defined type definition in output -- it's already defined elsewhere */
56 OPTS_NOTYPE = 1 << 16,
57 /* --- algorithm employed by gperf --- */
58 /* use the given key positions */
59 OPTS_POSITIONS = 1 << 17,
60 /* handle duplicate hash values for keywords */
61 OPTS_DUP = 1 << 18,
62 /* don't include keyword length in hash computations */
63 OPTS_NOLENGTH = 1 << 19,
64 /* randomly initialize the associated values table */
65 OPTS_RANDOM = 1 << 20,
66 /* --- informative output --- */
67 /* enable debugging (prints diagnostics to stderr) */
68 OPTS_DEBUG = 1 << 21,
69 };
70 /*}}} constants -- END */
71 /*{{{ types */
72 struct Options {
73 /* records count of command-line arguments */
74 u32 argument_count;
75 /* stores a pointer to command-line argument vector */
76 u8 **argument_vector;
77 /* holds the boolean options */
78 u32 option_word;
79 /* separates keywords from other attributes */
80 u8 *delimiters;
81 /* suffix for empty struct initializers */
82 u8 *initializer_suffix;
83 /* name used for generated hash function */
84 u8 *hash_name;
85 /* initial value for asso_values table */
86 s32 initial_asso_value;
87 /* jump length when trying alternative values */
88 s32 jump;
89 /* contains user-specified key choices */
90 struct Positions *key_positions;
91 /* Name used for keyword key */
92 u8 *slot_name;
93 /* the output language */
94 u8 *language;
95 /* number of attempts at finding good asso_values */
96 s32 asso_iterations;
97 /* names used for generated lookup function */
98 u8 *function_name;
99 /* name used for the string pool */
100 u8 *stringpool_name;
101 /* factor by which to multiply the generated table's size */
102 f32 size_multiple;
103 /* number of switch statements to generate */
104 s32 total_switches;
105 /* name used for hash table array */
106 u8 *wordlist_name;
107 /* name used for generated C++ class */
108 u8 *class_name;
109 /* name of output file */
110 u8 *output_file_name;
111 /* name used for length table array */
112 u8 *lengthtable_name;
113 /* prefix for the constants */
114 u8 *constants_prefix;
115 /* name of input file */
116 u8 *input_file_name;
117 };
118 /*}}} types -- END */
119 /*{{{ public static methods */
120 static struct Options *opts_new(void);
121 static void opts_del(struct Options *t);
122 static void opts_parse_options(struct Options *t, u32 argc, u8 **argv);
123 static void opts_long_usage(FILE *stream);
124 static void opts_short_usage(FILE *stream);
125 static void opts_print(struct Options *t);
126 /*{{{ accessors */
127 static void opts_set_delimiters(struct Options *t, u8 *delimiters);
128 static void opts_set_language(struct Options *t, u8 *language);
129 static void opts_set_slot_name(struct Options *t, u8 *name);
130 static void opts_set_initializer_suffix(struct Options *t, u8 *initializers);
131 static void opts_set_hash_name(struct Options *t, u8 *name);
132 static void opts_set_function_name(struct Options *t, u8 *name);
133 static void opts_set_class_name(struct Options *t, u8 *name);
134 static void opts_set_stringpool_name(struct Options *t, u8 *name);
135 static void opts_set_constants_prefix(struct Options *t, u8 *prefix);
136 static void opts_set_wordlist_name(struct Options *t, u8 *name);
137 static void opts_set_lengthtable_name(struct Options *t, u8 *name);
138 static void opts_set_total_switches(struct Options *t, s32 total_switches);
139 /*}}}*/
140 /*}}} public static methods -- END */
141 /*}}} Options -- END */
142 /*{{{ PositionStringParser */
143 /*{{{ constants and types */
144 struct PositionStringParser {
145 /*{{{ private */
146 /* A pointer to the string provided by the user */
147 u8 *str;
148 /* Smallest possible value, inclusive */
149 s32 low_bound;
150 /* Greatest possible value, inclusive */
151 s32 high_bound;
152 /* A value marking the abstract "end of word" ( usually '$') */
153 s32 end_word_marker;
154 /* Error value returned when input is syntactically erroneous */
155 s32 error_value;
156 /* Value returned after last key is processed */
157 s32 end_marker;
158 /* Intermediate state for producing a range of positions */
159 bool in_range; /* True while producing a range of positions */
160 s32 range_upper_bound; /* Upper bound (inclusive) of the range */
161 s32 range_curr_value; /* Last value returned */
162 /*}}} private -- END */
163 };
164 /*}}} constants and types -- END */
165 /*{{{ public static methods */
166 static struct PositionStringParser *posstrp_new(u8 *str, s32 low_bound, s32 high_bound,
167 s32 end_word_marker, s32 error_value, s32 end_marker);
168 static void posstrp_del(struct PositionStringParser *t);
169 static s32 posstrp_nextPosition(struct PositionStringParser *t);
170 /*}}} public static methods -- END */
171 /*}}} PositionStringParser -- END */
172 /*------------------------------------------------------------------------------------------------*/
173 #define EPILOG
174 #include "namespace/positions.h"
175 #include "namespace/options.h"
176 #undef EPILOG
177 /*------------------------------------------------------------------------------------------------*/
178 #endif
File output.c added (mode: 100644) (index 0000000..ce76117)
3 #include <stdbool.h>
4 #include "c_fixing.h"
5 #include "globals.h"
6 #include "options.h"
7 #include "output.h"
8 #include "keyword.h"
9 #include "keyword_list.h"
10 #include "positions.h"
11 /*------------------------------------------------------------------------------------------------*/
12 #include "namespace/globals.h"
13 #include "namespace/options.h"
14 #include "namespace/output.h"
15 #include "namespace/output.c"
16 #include "namespace/keyword.h"
17 #include "namespace/keyword_list.h"
18 #include "namespace/positions.h"
19 /*------------------------------------------------------------------------------------------------*/
20 /* We use a downcase table because when called repeatedly, the code gperf_downcase[c]
21 is faster than
22 if (c >= 'A' && c <= 'Z')
23 c += 'a' - 'A';
24 */
26 /*------------------------------------------------------------------------------------------------*/
27 /*{{{ local */
28 /*{{{ types */
29 /*
30 * because of the way output_keyword_table works, every duplicate set is
31 * stored contiguously in the wordlist array
32 */
33 struct Duplicate_Entry
34 {
35 s32 hash_value; /* hash value for this particular duplicate set */
36 s32 index; /* index into the main keyword storage array */
37 s32 count; /* number of consecutive duplicates at this index */
38 };
39 /*}}}*/
40 /*{{{ variables */
41 /* the "register " storage-class specifier */
42 static u8 *register_scs;
43 /* the "const " qualifier */
44 static u8 *const_always;
45 /* the "const " qualifier, for read-only arrays */
46 static u8 *const_readonly_array;
47 /* the "const " qualifier, for the array type */
48 static u8 *const_for_struct;
49 /*}}} variables -- END */
50 /*{{{ code */
51 /*{{{ output_string */
52 /*
53 * Outputs a keyword, as a string: enclosed in double quotes, escaping backslashes, double quote and
54 * unprintable characters
55 */
56 static void output_string(u8 *key, s32 len)
57 {
58 putchar('"');
59 loop {
60 u8 c;
62 if (len <= 0)
63 break;
64 c = (u8)(*key++);
65 if (isprint(c)) {
66 if (c == '"' || c == '\\')
67 putchar('\\');
68 putchar(c);
69 } else {
70 /*
71 * Use octal escapes, not hexadecimal escapes, because some old C compilers
72 * didn't understand hexadecimal escapes, and because hexadecimal escapes
73 * are not limited to 2 digits, thus needing special care if the following
74 * character happens to be a digit.
75 */
76 putchar('\\');
77 putchar('0' + ((c >> 6) & 7));
78 putchar('0' + ((c >> 3) & 7));
79 putchar('0' + (c & 7));
80 }
81 len--;
82 }
83 putchar('"');
84 }/*}}}*/
85 /*{{{ output_line_directive */
86 /* outputs a #line directive, referring to the given line number */
87 static void output_line_directive(u32 lineno)
88 {
89 u8 *file_name;
91 file_name = options->input_file_name;
92 if (file_name != 0) {
93 printf("#line %u ", lineno);
94 output_string(file_name, (s32)strlen(file_name));
95 printf("\n");
96 }
97 }/*}}}*/
98 /*{{{ output_constant_define */
99 static void output_constant_define(u8 *name, s32 value)
100 {
101 u8 *prefix;
102 u8 *combined_name;
104 prefix = options->constants_prefix;
105 combined_name = calloc(strlen(prefix) + strlen(name) + 1, sizeof(u8));
106 strcpy(combined_name, prefix);
107 strcpy(combined_name + strlen(prefix), name);
108 printf("#define %s %d\n", combined_name, value);
109 free(combined_name);
110 }/*}}}*/
111 /*{{{ output_constant_enum */
112 static void output_constant_enum(u8 *name, s32 value, u8 *indentation, bool *pending_comma)
113 {
114 u8 *prefix;
115 u8 *combined_name;
117 prefix = options->constants_prefix;
118 combined_name = calloc(strlen(prefix) + strlen(name) + 1, sizeof(u8));
119 strcpy(combined_name, prefix);
120 strcpy(combined_name + strlen(prefix), name);
121 if (*pending_comma)
122 printf (",\n");
123 printf("%s %s = %d", indentation, combined_name, value);
124 *pending_comma = true;
125 free(combined_name);
126 }/*}}}*/
127 /*{{{ ouput_upperlower_table */
129 static void output_upperlower_table(void)
130 {
131 u32 c;
133 printf(
134 "#ifndef GPERF_DOWNCASE\n"
135 "#define GPERF_DOWNCASE 1\n"
136 "static %sunsigned char gperf_downcase[256] =\n"
137 " {",
138 const_readonly_array);
139 c = 0;
140 loop {
141 if (c >= 256)
142 break;
143 if ((c % 15) == 0)
144 printf("\n ");
145 printf(" %3d", c >= 'A' && c <= 'Z' ? c + 'a' - 'A' : c);
146 if (c < 255)
147 printf (",");
148 ++c;
149 }
150 printf("\n"
151 " };\n"
152 "#endif\n\n");
153 }
154 #endif
155 /*}}}*/
156 /*{{{ output_upperlower_memcmp */
157 /* output gperf's ASCII-case insensitive memcmp replacement */
158 static void output_upperlower_memcmp(void)
159 {
160 printf(
161 "#ifndef GPERF_CASE_MEMCMP\n"
162 "#define GPERF_CASE_MEMCMP 1\n"
163 "static int\n"
164 "gperf_case_memcmp ");
165 printf(OPTS(KRC) ? "(s1, s2, n)\n"
166 " %schar *s1;\n"
167 " %schar *s2;\n"
168 " %ssize_t n;\n" :
169 OPTS(C) ? "(s1, s2, n)\n"
170 " %sconst char *s1;\n"
171 " %sconst char *s2;\n"
172 " %ssize_t n;\n" :
173 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2, %ssize_t n)\n" :
174 "", register_scs, register_scs, register_scs);
176 printf(
177 "{\n"
178 " for (; n > 0;)\n"
179 " {\n"
180 " unsiGNED char c1 = gperf_downcase[(unsigned char)*s1++];\n"
181 " unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n"
182 " if (c1 == c2)\n"
183 " {\n"
184 " n--;\n"
185 " continue;\n"
186 " }\n"
187 " return (int)c1 - (int)c2;\n"
188 " }\n"
189 " return 0;\n"
190 "}\n");
191 #else
192 printf(
193 "{\n"
194 " for (; n > 0;)\n"
195 " {\n"
196 " unsigned char c1 = *s1++;\n"
197 " unsigned char c2 = *s2++;\n"
198 " if (c1 >= 'A' && c1 <= 'Z')\n"
199 " c1 += 'a' - 'A';\n"
200 " if (c2 >= 'A' && c2 <= 'Z')\n"
201 " c2 += 'a' - 'A';\n"
202 " if (c1 == c2)\n"
203 " {\n"
204 " n--;\n"
205 " continue;\n"
206 " }\n"
207 " return (int)c1 - (int)c2;\n"
208 " }\n"
209 " return 0;\n"
210 "}\n");
211 #endif
212 printf(
213 "#endif\n\n");
214 }/*}}}*/
215 /*{{{ output_upperlower_strncmp */
216 /* output gperf's ASCII-case insensitive strncmp replacement */
217 static void output_upperlower_strncmp(void)
218 {
219 printf(
220 "#ifndef GPERF_CASE_STRNCMP\n"
221 "#define GPERF_CASE_STRNCMP 1\n"
222 "static int\n"
223 "gperf_case_strncmp ");
224 printf(OPTS(KRC) ? "(s1, s2, n)\n"
225 " %schar *s1;\n"
226 " %schar *s2;\n"
227 " %ssize_t n;\n" :
228 OPTS(C) ? "(s1, s2, n)\n"
229 " %sconst char *s1;\n"
230 " %sconst char *s2;\n"
231 " %ssize_t n;\n" :
232 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2, %ssize_t n)\n" :
233 "", register_scs, register_scs, register_scs);
235 printf(
236 "{\n"
237 " for (; n > 0;)\n"
238 " {\n"
239 " unsigned char c1 = gperf_downcase[(unsigned char)*s1++];\n"
240 " unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n"
241 " if (c1 != 0 && c1 == c2)\n"
242 " {\n"
243 " n--;\n"
244 " continue;\n"
245 " }\n"
246 " return (int)c1 - (int)c2;\n"
247 " }\n"
248 " return 0;\n"
249 "}\n");
250 #else
251 printf(
252 "{\n"
253 " for (; n > 0;)\n"
254 " {\n"
255 " unsigned char c1 = *s1++;\n"
256 " unsigned char c2 = *s2++;\n"
257 " if (c1 >= 'A' && c1 <= 'Z')\n"
258 " c1 += 'a' - 'A';\n"
259 " if (c2 >= 'A' && c2 <= 'Z')\n"
260 " c2 += 'a' - 'A';\n"
261 " if (c1 != 0 && c1 == c2)\n"
262 " {\n"
263 " n--;\n"
264 " continue;\n"
265 " }\n"
266 " return (int)c1 - (int)c2;\n"
267 " }\n"
268 " return 0;\n"
269 "}\n");
270 #endif
271 printf(
272 "#endif\n\n");
273 }/*}}}*/
274 /*{{{ output_upperlower_strcmp */
275 /* output gperf's ASCII-case insensitive strcmp replacement */
276 static void output_upperlower_strcmp(void)
277 {
278 printf(
279 "#ifndef GPERF_CASE_STRCMP\n"
280 "#define GPERF_CASE_STRCMP 1\n"
281 "static int\n"
282 "gperf_case_strcmp ");
283 printf(OPTS(KRC) ? "(s1, s2)\n"
284 " %schar *s1;\n"
285 " %schar *s2;\n" :
286 OPTS(C) ? "(s1, s2)\n"
287 " %sconst char *s1;\n"
288 " %sconst char *s2;\n" :
289 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2)\n" :
290 "", register_scs, register_scs);
292 printf(
293 "{\n"
294 " for (;;)\n"
295 " {\n"
296 " unsigned char c1 = gperf_downcase[(unsigned char)*s1++];\n"
297 " unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n"
298 " if (c1 != 0 && c1 == c2)\n"
299 " continue;\n"
300 " return (int)c1 - (int)c2;\n"
301 " }\n"
302 "}\n");
303 #else
304 printf(
305 "{\n"
306 " for (;;)\n"
307 " {\n"
308 " unsigned char c1 = *s1++;\n"
309 " unsigned char c2 = *s2++;\n"
310 " if (c1 >= 'A' && c1 <= 'Z')\n"
311 " c1 += 'a' - 'A';\n"
312 " if (c2 >= 'A' && c2 <= 'Z')\n"
313 " c2 += 'a' - 'A';\n"
314 " if (c1 != 0 && c1 == c2)\n"
315 " continue;\n"
316 " return (int)c1 - (int)c2;\n"
317 " }\n"
318 "}\n");
319 #endif
320 printf
321 ("#endif\n\n");
322 }/*}}}*/
323 /*{{{ smallest_integral_type */
324 /* returns the smallest unsigned C type capable of holding integers up to N */
325 static u8 *smallest_integral_type(s32 n)
326 {
327 if (n <= UCHAR_MAX) return "unsigned char";
328 if (n <= USHRT_MAX) return "unsigned short";
329 return "unsigned int";
330 }/*}}}*/
331 /*{{{ smallest_integral_type_2 */
332 /* returns the smallest signed C type capable of holding integers from MIN to MAX */
333 static u8 *smallest_integral_type_2(s32 min, s32 max)
334 {
336 if (min >= SCHAR_MIN && max <= SCHAR_MAX) return "signed char";
337 if (min >= SHRT_MIN && max <= SHRT_MAX) return "short";
338 return "int";
339 }/*}}}*/
340 /*{{{ output_const_type */
341 /*
342 * Outputs a type and a const specifier (i.e. "const " or "").
343 * The output is terminated with a space.
344 */
345 static void output_const_type(u8 *const_string, u8 *type_string)
346 {
347 if (type_string[strlen(type_string) - 1] == '*')
348 /* for pointer types, put the 'const' after the type */
349 printf(
350 "%s %s", type_string, const_string);
351 else
352 /* for scalar or struct types, put the 'const' before the type */
353 printf(
354 "%s%s ", const_string, type_string);
355 }/*}}}*/
356 /*{{{ output_keyword_blank_entries */
357 static void output_keyword_blank_entries(s32 count, u8 *indent)
358 {
359 s32 columns;
360 s32 column;
361 s32 i;
363 if (OPTS(TYPE)) {
364 columns = 58 / (4 + (OPTS(SHAREDLIB) ? 2 : OPTS(NULLSTRINGS) ? 8 : 2)
365 + strlen(options->initializer_suffix));
366 if (columns == 0)
367 columns = 1;
368 } else
369 columns = (OPTS(SHAREDLIB) ? 9 : OPTS(NULLSTRINGS) ? 4 : 9);
370 column = 0;
371 i = 0;
372 loop {
373 if (i >= count)
374 break;
375 if ((column % columns) == 0) {
376 if (i > 0)
377 printf(
378 ",\n");
379 printf(
380 "%s ", indent);
381 } else if (i > 0)
382 printf(", ");
383 if (OPTS(TYPE))
384 printf("{");
386 printf("-1");
387 else {
389 printf("(char*)0");
390 else
391 printf("\"\"");
392 }
393 if (OPTS(TYPE))
394 printf(
395 "%s}", options->initializer_suffix);
396 ++column;
397 ++i;
398 }
399 }/*}}}*/
400 /*{{{ output_keyword_entry */
401 static void output_keyword_entry(struct Keyword *tmp, s32 stringpool_index, u8 *indent,
402 bool is_duplicate)
403 {
404 if (OPTS(TYPE))
405 output_line_directive(tmp->lineno);
406 printf(
407 "%s ", indent);
408 if (OPTS(TYPE))
409 printf("{");
411 /*
412 * How to determine a certain offset in stringpool at compile time?
413 * - The standard way would be to use the 'offsetof' macro. But it is only
414 * defined in <stddef.h>, and <stddef.h> is not among the prerequisite
415 * header files that the user must #include.
416 * - The next best way would be to take the address and cast to 'intptr_t'
417 * or 'uintptr_t'. But these types are only defined in <stdint.h>, and
418 * <stdint.h> is not among the prerequisite header files that the user
419 * must #include.
420 * - The next best approximation of 'uintptr_t' is 'size_t'. It is defined
421 * in the prerequisite header <string.h>.
422 * - The types 'long' and 'unsigned long' do work as well, but on 64-bit
423 * native Windows platforms, they don't have the same size as pointers
424 * and therefore generate warnings.
425 */
426 printf("(int)(size_t)&((struct %s_t *)0)->%s_str%d",
427 options->stringpool_name, options->stringpool_name, stringpool_index);
428 else
429 output_string(tmp->allchars, tmp->allchars_length);
430 if (OPTS(TYPE)) {
431 if (strlen(tmp->rest) > 0)
432 printf(",%s", tmp->rest);
433 printf("}");
434 }
435 if (OPTS(DEBUG)) {
436 printf(" /* ");
437 if (is_duplicate)
438 printf("hash value duplicate, ");
439 else
440 printf("hash value = %d, ", tmp->hash_value);
441 printf("index = %d */", tmp->final_index);
442 }
444 }/*}}}*/
445 /*{{{ output_switch_case */
446 /* Output a single switch case (including duplicates). Advance list. */
447 static struct Keyword_List *output_switch_case(struct Keyword_List *list, s32 indent,
448 s32 *jumps_away)
449 {
450 if (OPTS(DEBUG))
451 printf(
452 "%*s/* hash value = %4d, keyword = \"%.*s\" */\n", indent, "", list->kw->hash_value,
453 list->kw->allchars_length, list->kw->allchars);
454 if (OPTS(DUP) && list->kw->duplicate_link) {
455 s32 count;
456 struct Keyword *links;
459 printf(
460 "%*slengthptr = &%s[%d];\n", indent, "", options->lengthtable_name, list->kw->final_index);
461 printf(
462 "%*swordptr = &%s[%d];\n", indent, "", options->wordlist_name, list->kw->final_index);
463 count = 0;
464 links = list->kw;
465 loop {
466 if (links == 0)
467 break;
468 ++count;
469 links = links->duplicate_link;
470 }
471 printf(
472 "%*swordendptr = wordptr + %d;\n"
473 "%*sgoto multicompare;\n", indent, "", count, indent, "");
474 *jumps_away = 1;
475 } else {
476 if (OPTS(LENTABLE)) {
477 printf(
478 "%*sif (len == %d)\n"
479 "%*s {\n", indent, "", list->kw->allchars_length, indent, "");
480 indent += 4;
481 }
482 printf("%*sresword = ", indent, "");
483 if (OPTS(TYPE))
484 printf("&%s[%d]", options->wordlist_name, list->kw->final_index);
485 else
486 output_string(list->kw->allchars, list->kw->allchars_length);
487 printf(";\n");
488 printf(
489 "%*sgoto compare;\n", indent, "");
490 if (OPTS(LENTABLE)) {
491 indent -= 4;
492 printf(
493 "%*s }\n", indent, "");
494 } else
495 *jumps_away = 1;
496 }
497 return list->next;
498 }/*}}}*/
499 /*{{{ output_switches */
500 /*
501 * output a total of size cases, grouped into num_switches switch statements, where 0 <
502 * num_switches <= size
503 */
504 static void output_switches(struct Keyword_List *list, s32 num_switches, s32 size,
505 s32 min_hash_value, s32 max_hash_value, s32 indent)
506 {
507 if (OPTS(DEBUG))
508 printf(
509 "%*s/* know %d <= key <= %d, contains %d cases */\n", indent, "", min_hash_value, max_hash_value,
510 size);
511 if (num_switches > 1) {
512 s32 part1;
513 s32 part2;
514 s32 size1;
515 s32 size2;
516 struct Keyword_List *tmp;
517 s32 count;
519 part1 = num_switches / 2;
520 part2 = num_switches - part1;
521 size1 = (s32)((f64)(size) / (f64)(num_switches) * (f64)(part1) + 0.5);
522 size2 = size - size1;
524 tmp = list;
525 count = size1;
526 loop {
527 if (count <= 0)
528 break;
529 tmp = tmp->next;
530 count--;
531 }
532 printf(
533 "%*sif (key < %d)\n"
534 "%*s {\n", indent, "", tmp->kw->hash_value, indent, "");
535 output_switches(list, part1, size1, min_hash_value, tmp->kw->hash_value - 1,
536 indent + 4);
537 printf(
538 "%*s }\n"
539 "%*selse\n"
540 "%*s {\n", indent, "", indent, "", indent, "");
541 output_switches(tmp, part2, size2, tmp->kw->hash_value, max_hash_value, indent + 4);
542 printf(
543 "%*s }\n", indent, "");
544 } else {
545 s32 lowest_case_value;
547 lowest_case_value = list->kw->hash_value;
548 if (size == 1) {
549 s32 jumps_away;
551 jumps_away = 0;
552 if (min_hash_value == max_hash_value)
553 output_switch_case(list, indent, &jumps_away);
554 else {
555 printf(
556 "%*sif (key == %d)\n"
557 "%*s {\n", indent, "", lowest_case_value, indent, "");
558 output_switch_case(list, indent + 4, &jumps_away);
559 printf(
560 "%*s }\n", indent, "");
561 }
562 } else {
563 if (lowest_case_value == 0)
564 printf(
565 "%*sswitch (key)\n", indent, "");
566 else
567 printf(
568 "%*sswitch (key - %d)\n", indent, "", lowest_case_value);
569 printf(
570 "%*s {\n", indent, "");
571 loop {
572 s32 jumps_away;
574 if (size <= 0)
575 break;
576 jumps_away = 0;
577 printf(
578 "%*s case %d:\n", indent, "", list->kw->hash_value - lowest_case_value);
579 list = output_switch_case(list, indent + 6, &jumps_away);
580 if (!jumps_away)
581 printf(
582 "%*s break;\n", indent, "");
583 size--;
584 }
585 printf(
586 "%*s }\n", indent, "");
587 }
588 }
589 }/*}}}*/
590 /*{{{ output_firstchar_comparison */
591 /*
592 * Outputs the comparison expression for the first byte. Returns true if the this comparison is
593 * complete.
594 */
595 static bool output_firstchar_comparison(u8 *expr1, u8 *expr2)
596 {
597 /*
598 * First, we emit a comparison of the first byte of the two strings. This catches most
599 * cases where the string being looked up is not in the hash table but happens to have the
600 * same hash code as an element of the hash table.
601 */
603 /* incomplete comparison, just for speedup */
604 printf("(((unsigned char)*");
605 printf("%s", expr1);
606 printf(" ^ (unsigned char)*");
607 printf("%s", expr2);
608 printf(") & ~32) == 0");
609 return false;
610 }
611 /* Complete comparison. */
612 printf("*");
613 printf("%s", expr1);
614 printf(" == *");
615 printf("%s", expr2);
616 return true;
617 }/*}}}*/
618 /*------------------------------------------------------------------------------------------------*/
619 /*{{{ output_comparison_X */
620 /*
621 * Outputs the comparison expression.
622 * expr1 outputs a simple expression of type 'const char *' referring to the string being looked up.
623 * expr2 outputs a simple expression of type 'const char *' referring to the constant string stored
624 * in the gperf generated hash table.
625 */
626 /*{{{ output_comparison_memcmp */
627 static void output_comparison_memcmp(u8 *expr1, u8 *expr2)
628 {
629 bool firstchar_done;
631 firstchar_done = output_firstchar_comparison(expr1, expr2);
632 printf(" && !");
634 printf("gperf_case_");
635 printf("memcmp (");
636 if (firstchar_done) {
637 printf("%s", expr1);
638 printf(" + 1, ");
639 printf("%s", expr2);
640 printf(" + 1, len - 1");
641 } else {
642 printf("%s", expr1);
643 printf(", ");
644 printf("%s", expr2);
645 printf(", len");
646 }
647 printf(")");
648 }/*}}}*/
649 /*{{{ output_comparison_strncmp */
650 static void output_comparison_strncmp(u8 *expr1, u8 *expr2)
651 {
652 bool firstchar_done;
654 firstchar_done = output_firstchar_comparison(expr1, expr2);
655 printf(" && !");
657 printf("gperf_case_");
658 printf("strncmp (");
659 if (firstchar_done) {
660 printf("%s", expr1);
661 printf(" + 1, ");
662 printf("%s", expr2);
663 printf(" + 1, len - 1");
664 } else {
665 printf("%s", expr1);
666 printf(", ");
667 printf("%s", expr2);
668 printf(", len");
669 }
670 printf(") && ");
671 printf("%s", expr2);
672 printf("[len] == '\\0'");
673 }/*}}}*/
674 /*{{{ output_comparison_strcmp */
675 static void output_comparison_strcmp(u8 *expr1, u8 *expr2)
676 {
677 bool firstchar_done;
679 firstchar_done = output_firstchar_comparison(expr1, expr2);
680 printf(" && !");
682 printf("gperf_case_");
683 printf("strcmp (");
684 if (firstchar_done) {
685 printf("%s", expr1);
686 printf(" + 1, ");
687 printf("%s", expr2);
688 printf(" + 1");
689 } else {
690 printf("%s", expr1);
691 printf(", ");
692 printf("%s", expr2);
693 }
694 printf(")");
695 }/*}}}*/
696 /*}}} output_comparison_X -- END */
697 /*------------------------------------------------------------------------------------------------*/
698 /*}}} code -- END */
699 /*}}} local -- END */
700 /*------------------------------------------------------------------------------------------------*/
701 /*{{{ output_new */
702 /* Constructor.
703 Note about the keyword list starting at head:
704 - The list is ordered by increasing _hash_value. This has been achieved
705 by Search::sort().
706 - Duplicates, i.e. keywords with the same _selchars set, are chained
707 through the _duplicate_link pointer. Only one representative per
708 duplicate equivalence class remains on the linear keyword list.
709 - Accidental duplicates, i.e. keywords for which the _asso_values[] search
710 couldn't achieve different hash values, cannot occur on the linear
711 keyword list. Search::optimize would catch this mistake.
712 */
713 static struct Output *output_new(struct Keyword_List *head, u8 *struct_decl,
714 u32 struct_decl_lineno, u8 *return_type,
715 u8 *struct_tag, u8 *verbatim_declarations,
716 u8 *verbatim_declarations_end,
717 u32 verbatim_declarations_lineno,
718 u8 *verbatim_code, u8 *verbatim_code_end,
719 u32 verbatim_code_lineno, bool charset_dependent,
720 s32 total_keys, s32 max_key_len, s32 min_key_len,
721 bool hash_includes_len, struct Positions *positions,
722 u32 *alpha_inc, s32 total_duplicates,
723 u32 alpha_size, s32 *asso_values)
724 {
725 struct Output *t;
727 t = calloc(1, sizeof(*t));
728 t->head = head;
729 t->struct_decl = struct_decl;
730 t->struct_decl_lineno = struct_decl_lineno;
731 t->return_type = return_type;
732 t->struct_tag = struct_tag;
733 t->verbatim_declarations = verbatim_declarations;
734 t->verbatim_declarations_end = verbatim_declarations_end;
735 t->verbatim_declarations_lineno = verbatim_declarations_lineno;
736 t->verbatim_code = verbatim_code;
737 t->verbatim_code_end = verbatim_code_end;
738 t->verbatim_code_lineno = verbatim_code_lineno;
739 t->charset_dependent = charset_dependent;
740 t->total_keys = total_keys;
741 t->max_key_len = max_key_len;
742 t->min_key_len = min_key_len;
743 t->hash_includes_len = hash_includes_len;
744 t->key_positions = pos_new_cpy(positions);
745 t->alpha_inc = alpha_inc;
746 t->total_duplicates = total_duplicates;
747 t->alpha_size = alpha_size;
748 t->asso_values = asso_values;
749 return t;
750 }/*}}}*/
751 /*{{{ output_del */
752 static void output_del(struct Output *t)
753 {
754 pos_del(t->key_positions);
755 free(t);
756 }
757 /*}}}*/
758 /*{{{ output_do */
759 /* generates the hash function and the key word recognizer function based upon the user's Options */
760 static void output_do(struct Output *t)
761 {
762 output_compute_min_max(t);
763 if (OPTS(CPLUSPLUS)) /* yeah, we know nowadays that c++ is never a good idea anyway */
764 /*
765 * The 'register' keyword is removed from C++17. See
766 *
767 */
768 register_scs = "";
769 else
770 register_scs = "register ";
772 const_always = "const ";
773 const_readonly_array = (OPTS(CONST) ? "const " : "");
774 const_for_struct = ((OPTS(CONST) && OPTS(TYPE)) ? "const " : "" );
775 } else {
776 const_always = "";
777 const_readonly_array = "";
778 const_for_struct = "";
779 }
780 if (!OPTS(TYPE)) {
781 t->return_type = (const_always[0] != 0 ? "const char *" : "char *");
782 t->struct_tag = (const_always[0] != 0 ? "const char *" : "char *");
783 }
784 t->wordlist_eltype = (OPTS(SHAREDLIB) && !OPTS(TYPE) ? (u8*)"int" : t->struct_tag);
785 printf ("/* ");
786 if (OPTS(KRC))
787 printf("KR-C");
788 else if (OPTS(C))
789 printf("C");
790 else if (OPTS(ANSIC))
791 printf("ANSI-C");
792 else if (OPTS(CPLUSPLUS))
793 printf("C++");
794 printf(" code produced by gperf version %s */\n", cgperf_version_string);
795 opts_print(options);
796 printf("\n");
797 if (!OPTS(POSITIONS)) {
798 printf ("/* Computed positions: -k'");
799 pos_print(t->key_positions);
800 printf("' */\n");
801 }
802 printf("\n");
803 if (t->charset_dependent && (t->key_positions->size > 0 || OPTS(UPPERLOWER))) {
804 printf("#if !((' ' == 32) && ('!' == 33) && ('\"' == 34) && ('#' == 35) \\\n"
805 " && ('%%' == 37) && ('&' == 38) && ('\\'' == 39) && ('(' == 40) \\\n"
806 " && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \\\n"
807 " && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \\\n"
808 " && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \\\n"
809 " && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \\\n"
810 " && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \\\n"
811 " && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \\\n"
812 " && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \\\n"
813 " && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \\\n"
814 " && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \\\n"
815 " && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \\\n"
816 " && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \\\n"
817 " && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \\\n"
818 " && ('Z' == 90) && ('[' == 91) && ('\\\\' == 92) && (']' == 93) \\\n"
819 " && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \\\n"
820 " && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \\\n"
821 " && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \\\n"
822 " && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \\\n"
823 " && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \\\n"
824 " && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \\\n"
825 " && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \\\n"
826 " && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))\n"
827 "/* The character set is not based on ISO-646. */\n");
828 printf("%s \"gperf generated tables don't work with this execution character set. Please report a bug to <>.\"\n", OPTS(KRC) || OPTS(C) ? "error" : "#error");
829 printf ("#endif\n\n");
830 }
831 if (t->verbatim_declarations < t->verbatim_declarations_end) {
832 output_line_directive(t->verbatim_declarations_lineno);
833 fwrite(t->verbatim_declarations, 1, t->verbatim_declarations_end -
834 t->verbatim_declarations, stdout);
835 }
836 if (OPTS(TYPE) && !OPTS(NOTYPE)) {
837 /* output type declaration now, reference it later on.... */
838 output_line_directive(t->struct_decl_lineno);
839 printf("%s\n", t->struct_decl);
840 }
841 if (OPTS(INCLUDE))
842 printf("#include <string.h>\n"); /* declare strlen(), strcmp(), strncmp() */
843 if (!OPTS(ENUM)) /* refactored: overzealous code factorization */
844 output_constants_defines(t);
845 else if (OPTS(GLOBAL))
846 output_constants_enum(t, "");
847 printf("/* maximum key range = %d, duplicates = %d */\n\n", t->max_hash_value - t->min_hash_value + 1, t->total_duplicates);
850 output_upperlower_table();
851 #endif
853 output_upperlower_memcmp();
854 else {
855 if (OPTS(COMP))
856 output_upperlower_strncmp();
857 else
858 output_upperlower_strcmp();
859 }
860 }
862 printf(
863 "class %s\n"
864 "{\n"
865 "private:\n"
866 " static inline unsigned int %s (const char *str, size_t len);\n"
867 "public:\n"
868 " static %s%s%s (const char *str, size_t len);\n"
869 "};\n"
870 "\n", options->class_name, options->hash_name, const_for_struct, t->return_type,
871 options->function_name);
872 output_hash_function(t);
874 output_lookup_pools(t);
875 if (OPTS(GLOBAL))
876 output_lookup_tables(t);
877 output_lookup_function(t);
878 if (t->verbatim_code < t->verbatim_code_end) {
879 output_line_directive(t->verbatim_code_lineno);
880 fwrite(t->verbatim_code, 1, t->verbatim_code_end - t->verbatim_code, stdout);
881 }
882 fflush(stdout);
883 }/*}}}*/
884 /*{{{ output_compute_min_max */
885 static void output_compute_min_max(struct Output *t)
886 {
887 struct Keyword_List *tmp;
888 /*
889 * since the list is already sorted by hash value all we need to do is to look at the first
890 * and the last element of the list
891 */
892 t->min_hash_value = t->head->kw->hash_value;
893 tmp = t->head;
894 loop {
895 if (tmp->next == 0)
896 break;
897 tmp = tmp->next;
898 }
899 t->max_hash_value = tmp->kw->hash_value;
900 }/*}}}*/
901 /*{{{ output_constants_defines */
902 static void output_constants_defines(struct Output *t)
903 {
904 printf("\n");
905 output_constant_define("TOTAL_KEYWORDS", t->total_keys);
906 output_constant_define("MIN_WORD_LENGTH", t->min_key_len);
907 output_constant_define("MAX_WORD_LENGTH", t->max_key_len);
908 output_constant_define("MIN_HASH_VALUE", t->min_hash_value);
909 output_constant_define("MAX_HASH_VALUE", t->max_hash_value);
910 }/*}}}*/
911 /*{{{ output_constants_enum */
912 static void output_constants_enum(struct Output *t, u8 *indentation)
913 {
914 bool pending_comma;
916 printf("%senum\n"
917 "%s {\n", indentation, indentation);
918 pending_comma = false;
919 output_constant_enum("TOTAL_KEYWORDS", t->total_keys, indentation, &pending_comma);
920 output_constant_enum("MIN_WORD_LENGTH", t->min_key_len, indentation, &pending_comma);
921 output_constant_enum("MAX_WORD_LENGTH", t->max_key_len, indentation, &pending_comma);
922 output_constant_enum("MIN_HASH_VALUE", t->min_hash_value, indentation, &pending_comma);
923 output_constant_enum("MAX_HASH_VALUE", t->max_hash_value, indentation, &pending_comma);
924 if (pending_comma)
925 printf("\n");
926 printf("%s };\n\n", indentation);
927 }/*}}}*/
928 /*{{{ output_hash_function */
929 /* Generates C code for the hash function that returns the
930 proper encoding for each keyword.
931 The hash function has the signature
932 unsigned int <hash> (const char *str, size_t len). */
933 static void output_hash_function(struct Output *t)
934 {
935 /* output the function's head */
937 printf("inline ");
938 else if (OPTS(KRC) || OPTS(C) || OPTS(ANSIC))
939 printf(
940 "#ifdef __GNUC__\n"
941 "__inline\n"
942 "#else\n"
943 "#ifdef __cplusplus\n"
944 "inline\n"
945 "#endif\n"
946 "#endif\n");
947 if (/* the function does not use the 'str' argument? */
948 (t->key_positions->size == 0)
949 || /* the function uses 'str', but not the 'len' argument? */
950 (!t->hash_includes_len
951 && t->key_positions->positions[0] < t->min_key_len)
952 && t->key_positions->positions[t->key_positions->size - 1] != POS_LASTCHAR)
953 /* pacify lint */
954 printf("/*ARGSUSED*/\n");
955 if (OPTS(KRC) || OPTS(C) || OPTS(ANSIC))
956 printf("static ");
957 printf("unsigned int\n");
959 printf("%s::", options->class_name);
960 printf("%s ", options->hash_name);
961 printf(OPTS(KRC) ?
962 "(str, len)\n"
963 " %schar *str;\n"
964 " %ssize_t len;\n" :
965 OPTS(C) ?
966 "(str, len)\n"
967 " %sconst char *str;\n"
968 " %ssize_t len;\n" :
970 "(%sconst char *str, %ssize_t len)\n" :
971 "", register_scs, register_scs);
973 /*
974 * note that when the hash function is called, it has already been verified that
975 * min_key_len <= len <= max_key_len
976 */
977 /* output the function's body */
978 printf(
979 "{\n");
980 /* first the asso_values array */
981 if (t->key_positions->size > 0) {
982 s32 columns;
983 s32 field_width;
984 s32 trunc;
985 u32 count;
986 /*
987 * the values in the asso_values array are all unsigned integers <= MAX_HASH_VALUE +
988 * 1
989 */
990 printf(
991 " static %s%s asso_values[] =\n"
992 " {", const_readonly_array, smallest_integral_type(t->max_hash_value + 1));
993 columns = 10;
994 /* calculate maximum number of digits required for MAX_HASH_VALUE + 1 */
995 field_width = 2;
996 trunc = t->max_hash_value + 1;
997 loop {
998 trunc /= 10;
999 if (trunc <= 0)
1000 break;
1001 ++field_width;
1002 }
1003 count = 0;
1004 loop {
1005 if (count >= t->alpha_size)
1006 break;
1007 if (count > 0)
1008 printf(",");
1009 if ((count % columns) == 0)
1010 printf("\n ");
1011 printf("%*d", field_width, t->asso_values[count]);
1012 ++count;
1013 }
1014 printf(
1015 "\n"
1016 " };\n");
1017 }
1018 if (t->key_positions->size == 0) {
1019 /* trivial case: No key positions at all */
1020 printf(
1021 " return %s;\n", t->hash_includes_len ? "len" : "0");
1022 } else {
1023 struct PositionIterator *iter;
1024 s32 key_pos;
1025 /*
1026 * Iterate through the key positions. Remember that Positions::sort() has sorted
1027 * them in decreasing order, with Positions::LASTCHAR coming last.
1028 */
1029 iter = pos_iterator(t->key_positions, t->max_key_len);
1030 /* get the highest key position */
1031 key_pos = positer_next(iter);
1032 if (key_pos == POS_LASTCHAR || key_pos < t->min_key_len) {
1033 /*
1034 * We can perform additional optimizations here: Write it out as a single
1035 * expression. Note that the values are added as 'int's even though the
1036 * asso_values array may contain 'unsigned char's or 'unsigned short's.
1037 */
1038 printf(
1039 " return %s", t->hash_includes_len ? "len + " : "");
1040 if (t->key_positions->size == 2
1041 && t->key_positions->positions[0] == 0
1042 && t->key_positions->positions[1] == POS_LASTCHAR) {
1043 /* optimize special case of "-k 1,$" */
1044 output_asso_values_ref(t, POS_LASTCHAR);
1045 printf(" + ");
1046 output_asso_values_ref(t, 0);
1047 } else {
1048 loop {
1049 if (key_pos == POS_LASTCHAR)
1050 break;
1051 output_asso_values_ref(t, key_pos);
1052 key_pos = positer_next(iter);
1053 if (key_pos != POSITER_EOS)
1054 printf(" + ");
1055 else
1056 break;
1057 }
1058 if (key_pos == POS_LASTCHAR)
1059 output_asso_values_ref(t, POS_LASTCHAR);
1060 }
1061 printf(";\n");
1062 } else {
1063 u8 *fallthrough_marker;
1064 /* we've got to use the correct, but brute force, technique */
1065 /*
1066 * pseudo-statement or comment that avoids a compiler warning or lint
1067 * warning
1068 */
1069 fallthrough_marker =
1070 "#if defined __cplusplus && (__cplusplus >= 201703L || (__cplusplus >= 201103L && defined __clang_major__ && defined __clang_minor__ && __clang_major__ + (__clang_minor__ >= 9) > 3))\n"
1071 " [[fallthrough]];\n"
1072 "#elif defined __GNUC__ && __GNUC__ >= 7\n"
1073 " __attribute__ ((__fallthrough__));\n"
1074 "#endif\n"
1075 " /*FALLTHROUGH*/\n";
1076 /*
1077 * it doesn't really matter whether hval is an 'int' or 'unsigned int', but
1078 * 'unsigned int' gives fewer warnings
1079 */
1080 printf(
1081 " %sunsigned int hval = %s;\n\n"
1082 " switch (%s)\n"
1083 " {\n"
1084 " default:\n", register_scs, t->hash_includes_len ? "len" : "0",
1085 t->hash_includes_len ? "hval" : "len");
1086 loop {
1087 if (key_pos == POS_LASTCHAR || key_pos < t->max_key_len)
1088 break;
1089 key_pos = positer_next(iter);
1090 if (key_pos == POSITER_EOS)
1091 break;
1092 }
1093 if (key_pos != POSITER_EOS && key_pos != POS_LASTCHAR) {
1094 s32 i;
1096 i = key_pos;
1097 loop {
1098 if (i > key_pos)
1099 printf("%s", fallthrough_marker);
1100 loop {
1101 if (i <= key_pos)
1102 break;
1103 printf(" case %d:\n", i);
1104 i--;
1105 }
1106 printf(" hval += ");
1107 output_asso_values_ref(t, key_pos);
1108 printf(";\n");
1109 key_pos = positer_next(iter);
1110 if (key_pos == POSITER_EOS || key_pos == POS_LASTCHAR)
1111 break;
1112 }
1113 if (i >= t->min_key_len)
1114 printf("%s", fallthrough_marker);
1115 loop {
1116 if (i < t->min_key_len)
1117 break;
1118 printf(" case %d:\n", i);
1119 i--;
1120 }
1121 }
1122 printf(
1123 " break;\n"
1124 " }\n"
1125 " return hval");
1126 if (key_pos == POS_LASTCHAR) {
1127 printf(" + ");
1128 output_asso_values_ref(t, POS_LASTCHAR);
1129 }
1130 printf (";\n");
1131 }
1132 positer_del(iter);
1133 }
1134 printf ("}\n\n");
1135 }/*}}}*/
1136 /*{{{ output_asso_values_ref */
1137 /* Generates a C expression for an asso_values[] reference. */
1138 static void output_asso_values_ref(struct Output *t, s32 pos)
1139 {
1140 printf("asso_values[");
1141 /*
1142 * Always cast to unsigned char. This is necessary when the alpha_inc is nonzero, and also
1143 * avoids a gcc warning "subscript has type 'char'".
1144 */
1145 if (OPTS(CPLUSPLUS)) {
1146 /*
1147 * In C++, a C style cast may lead to a 'warning: use of old-style cast'.
1148 * Therefore prefer the C++ style cast syntax.
1149 */
1150 printf("static_cast<unsigned char>(");
1151 output_asso_values_index(t, pos);
1152 printf(")");
1153 } else {
1154 printf("(unsigned char)");
1155 output_asso_values_index(t, pos);
1156 }
1157 printf("]");
1158 }/*}}}*/
1159 /*{{{ output_asso_values_index */
1160 /* generates a C expression for an asso_values[] index */
1161 static void output_asso_values_index(struct Output *t, s32 pos)
1162 {
1163 if (pos == POS_LASTCHAR)
1164 printf("str[len - 1]");
1165 else {
1166 printf("str[%d]", pos);
1167 if (t->alpha_inc[pos])
1168 printf("+%u", t->alpha_inc[pos]);
1169 }
1170 }/*}}}*/
1171 /*{{{ output_lookup_pools */
1172 /* generate all pools needed for the lookup function */
1173 static void output_lookup_pools(struct Output *t)
1174 {
1175 if (OPTS(SWITCH)) {
1176 if (OPTS(TYPE) || (OPTS(DUP) && t->total_duplicates > 0))
1177 output_string_pool(t);
1178 } else
1179 output_string_pool(t);
1180 }/*}}}*/
1181 /*{{{ output_string_pool */
1182 /*
1183 * Prints out the string pool, containing the strings of the keyword table.
1184 * Only called if option[SHAREDLIB]
1185 */
1186 static void output_string_pool(struct Output *t)
1187 {
1188 u8 *indent;
1189 s32 index;
1190 struct Keyword_List *tmp;
1192 indent = OPTS(TYPE) || OPTS(GLOBAL) ? "" : " ";
1194 printf(
1195 "%sstruct %s_t\n"
1196 "%s {\n", indent, options->stringpool_name, indent);
1197 tmp = t->head;
1198 index = 0;
1199 loop {
1200 struct Keyword *kw;
1202 if (tmp == 0)
1203 break;
1204 kw = tmp->kw;
1205 /*
1206 * If generating a switch statement, and there is no user defined type, we generate
1207 * non-duplicates directly in the code. Only duplicates go into the table.
1208 */
1209 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1210 continue;
1211 if (!OPTS(SWITCH) && !OPTS(DUP))
1212 index = kw->hash_value;
1213 printf("%s char %s_str%d[sizeof(", indent, options->stringpool_name, index);
1214 output_string(kw->allchars, kw->allchars_length);
1215 printf(")];\n");
1216 /* deal with duplicates specially */
1217 if (kw->duplicate_link) {/* implies option[DUP] */
1218 struct Keyword *links;
1220 links = kw->duplicate_link;
1221 loop {
1222 if (links == 0)
1223 break;
1224 if (!(links->allchars_length == kw->allchars_length
1225 && memcmp(links->allchars, kw->allchars,
1226 kw->allchars_length) == 0)) {
1227 ++index;
1228 printf("%s char %s_str%d[sizeof(", indent,
1229 options->stringpool_name, index);
1230 output_string(links->allchars, links->allchars_length);
1231 printf(")];\n");
1232 }
1233 links = links->duplicate_link;
1234 }
1235 }
1236 ++index;
1237 tmp = tmp->next;
1238 }
1239 printf(
1240 "%s };\n", indent);
1241 printf(
1242 "%sstatic %sstruct %s_t %s_contents =\n"
1243 "%s {\n", indent, const_readonly_array, options->stringpool_name, options->stringpool_name,
1244 indent);
1245 tmp = t->head;
1246 index = 0;
1247 loop {
1248 struct Keyword *kw;
1250 if (tmp == 0)
1251 break;
1252 kw = tmp->kw;
1253 /*
1254 * If generating a switch statement, and there is no user defined type, we generate
1255 * non-duplicates directly in the code. Only duplicates go into the table.
1256 */
1257 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1258 continue;
1259 if (index > 0)
1260 printf(",\n");
1262 if (!OPTS(SWITCH) && !OPTS(DUP))
1263 index = kw->hash_value;
1264 printf(
1265 "%s ", indent);
1266 output_string(kw->allchars, kw->allchars_length);
1267 /* deal with duplicates specially */
1268 if (kw->duplicate_link != 0) {/* implies option[DUP] */
1269 struct Keyword *links;
1271 links = kw->duplicate_link;
1272 loop {
1273 if (links == 0)
1274 break;
1275 if (!(links->allchars_length == kw->allchars_length
1276 && memcmp(links->allchars, kw->allchars,
1277 kw->allchars_length) == 0)) {
1278 ++index;
1279 printf(",\n");
1280 printf(
1281 "%s ", indent);
1282 output_string(links->allchars, links->allchars_length);
1283 }
1284 links = links->duplicate_link;
1285 }
1286 }
1287 ++index;
1288 tmp = tmp->next;
1289 }
1290 if (index > 0)
1291 printf("\n");
1292 printf(
1293 "%s };\n", indent);
1294 printf(
1295 "%s#define %s ((%schar *) &%s_contents)\n", indent, options->stringpool_name, const_always,
1296 options->stringpool_name);
1297 if (OPTS(GLOBAL))
1298 printf(
1299 "\n");
1300 }/*}}}*/
1301 /*{{{ output_lookup_tables */
1302 /* generate all the tables needed for the lookup function */
1303 static void output_lookup_tables(struct Output *t)
1304 {
1305 if (OPTS(SWITCH)) {
1306 /* use the switch in place of lookup table */
1307 if (OPTS(LENTABLE) && (OPTS(DUP) && t->total_duplicates > 0))
1308 output_keylength_table(t);
1309 if (OPTS(TYPE) || (OPTS(DUP) && t->total_duplicates > 0))
1310 output_keyword_table(t);
1311 } else {
1312 /* use the lookup table, in place of switch */
1313 if (OPTS(LENTABLE))
1314 output_keylength_table(t);
1315 output_keyword_table(t);
1316 output_lookup_array(t);
1317 }
1318 }/*}}}*/
1319 /*{{{ output_keylength_table */
1320 /*
1321 * Prints out a table of keyword lengths, for use with the comparison code in generated function
1322 * 'in_word_set'. Only called if option[LENTABLE].
1323 */
1324 static void output_keylength_table(struct Output *t)
1325 {
1326 s32 columns;
1327 u8 *indent;
1328 s32 index;
1329 s32 column;
1330 struct Keyword_List *tmp;
1332 columns = 14;
1333 indent = OPTS(GLOBAL) ? "" : " ";
1335 printf(
1336 "%sstatic %s%s %s[] =\n"
1337 "%s {", indent, const_readonly_array, smallest_integral_type(t->max_key_len),
1338 options->lengthtable_name, indent);
1339 column = 0;
1340 tmp = t->head;
1341 index = 0;
1342 loop {
1343 struct Keyword *kw;
1345 if (tmp == 0)
1346 break;
1347 kw = tmp->kw;
1348 /*
1349 * If generating a switch statement, and there is no user defined type, we generate
1350 * non-duplicates directly in the code. Only duplicates go into the table.
1351 */
1352 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1353 continue;
1354 if (index < kw->hash_value && !OPTS(SWITCH) && !OPTS(DUP)) {
1355 /* some blank entries */
1356 loop {
1357 if (index >= kw->hash_value)
1358 break;
1359 if (index > 0)
1360 printf(",");
1361 if ((column % columns) == 0)
1362 printf(
1363 "\n%s ", indent);
1364 ++column;
1365 printf("%3d", 0);
1366 ++index;
1367 }
1368 }
1369 if (index > 0)
1370 printf(",");
1371 if ((column % columns) == 0)
1372 printf(
1373 "\n%s ", indent);
1374 ++column;
1375 printf("%3d", kw->allchars_length);
1376 ++index;
1377 /* deal with duplicates specially */
1378 if (kw->duplicate_link != 0) {
1379 struct Keyword *links;
1381 links = kw->duplicate_link;
1382 loop {
1383 if (links == 0)
1384 break;
1385 printf(",");
1386 if ((column % columns) == 0)
1387 printf(
1388 "\n%s ", indent);
1389 ++column;
1390 printf("%3d", links->allchars_length);
1391 ++index;
1392 links = links->duplicate_link;
1393 }
1394 }
1395 tmp = tmp->next;
1396 }
1397 printf(
1398 "\n%s };\n", indent);
1399 if (OPTS(GLOBAL))
1400 printf(
1401 "\n");
1402 }/*}}}*/
1403 /*{{{ output_keyword_table */
1404 /* prints out the array containing the keywords for the hash function */
1405 static void output_keyword_table(struct Output *t)
1406 {
1407 u8 *indent;
1408 s32 index;
1409 struct Keyword_List *tmp;
1411 indent = OPTS(GLOBAL) ? "" : " ";
1412 printf(
1413 "%sstatic ", indent);
1414 output_const_type(const_readonly_array, t->wordlist_eltype);
1415 printf("%s[] =\n"
1416 "%s {\n", options->wordlist_name, indent);
1417 /* generate an array of reserved words at appropriate locations */
1418 tmp = t->head;
1419 index = 0;
1420 loop {
1421 struct Keyword *kw;
1423 if (tmp == 0)
1424 break;
1425 kw = tmp->kw;
1426 /*
1427 * If generating a switch statement, and there is no user defined type, we generate
1428 * non-duplicates directly in the code. Only duplicates go into the table.
1429 */
1430 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1431 continue;
1432 if (index > 0)
1433 printf(",\n");
1434 if (index < kw->hash_value && !OPTS(SWITCH) && !OPTS(DUP)) {
1435 /* some blank entries */
1436 output_keyword_blank_entries(kw->hash_value - index, indent);
1437 printf(",\n");
1438 index = kw->hash_value;
1439 }
1440 kw->final_index = index;
1441 output_keyword_entry(kw, index, indent, false);
1442 /* deal with duplicates specially */
1443 if (kw->duplicate_link != 0) { /* implies option[DUP] */
1444 struct Keyword *links;
1446 links = kw->duplicate_link;
1447 loop {
1448 s32 stringpool_index;
1450 if (links == 0)
1451 break;
1452 ++index;
1453 links->final_index = index;
1454 printf(",\n");
1455 stringpool_index =
1456 (links->allchars_length == kw->allchars_length
1457 && memcmp(links->allchars, kw->allchars,
1458 kw->allchars_length) == 0
1459 ? kw->final_index : links->final_index);
1460 output_keyword_entry(links, stringpool_index, indent, true);
1461 links = links->duplicate_link;
1462 }
1463 }
1464 ++index;
1465 tmp = tmp->next;
1466 }
1467 if (index > 0)
1468 printf("\n");
1469 printf(
1470 "%s };\n\n", indent);
1471 }/*}}}*/
1472 /*{{{ output_lookup_array */
1473 /*
1474 * generates the large, sparse table that maps hash values into the smaller, contiguous range of the
1475 * keyword table
1476 */
1477 static void output_lookup_array(struct Output *t)
1478 {
1480 struct Duplicate_Entry *duplicates;
1481 s32 *lookup_array;
1482 s32 lookup_array_size;
1483 struct Duplicate_Entry *dup_ptr;
1484 s32 *lookup_ptr;
1485 struct Keyword_List *tmp;
1486 s32 min;
1487 s32 max;
1488 u8 *indent;
1489 s32 field_width;
1490 s32 columns;
1491 s32 column;
1492 s32 i;
1493 if (!OPTS(DUP))
1494 return;
1496 DEFAULT_VALUE = -1;
1498 duplicates = calloc(t->total_duplicates, sizeof(*duplicates));
1499 lookup_array = calloc(t->max_hash_value + 1 + 2 * t->total_duplicates,
1500 sizeof(*lookup_array));
1501 lookup_array_size = t->max_hash_value + 1;
1502 dup_ptr = &duplicates[0];
1503 lookup_ptr = &lookup_array[t->max_hash_value + 1 + 2 * t->total_duplicates];
1505 loop {
1506 if (lookup_ptr <= lookup_array)
1507 break;
1508 *--lookup_ptr = DEFAULT_VALUE;
1509 }
1510 /* now dup_ptr = &duplicates[0] and lookup_ptr = &lookup_array[0] */
1511 tmp = t->head;
1512 loop {
1513 s32 hash_value;
1515 if (tmp == 0)
1516 break;
1517 hash_value = tmp->kw->hash_value;
1518 lookup_array[hash_value] = tmp->kw->final_index;
1519 if (OPTS(DEBUG))
1520 fprintf(stderr, "keyword = %.*s, index = %d\n", tmp->kw->allchars_length, tmp->kw->allchars, tmp->kw->final_index);
1521 if (tmp->kw->duplicate_link != 0) {
1522 struct Keyword *ptr;
1524 /* start a duplicate entry */
1525 dup_ptr->hash_value = hash_value;
1526 dup_ptr->index = tmp->kw->final_index;
1527 dup_ptr->count = 1;
1529 ptr = tmp->kw->duplicate_link;
1530 loop {
1531 if (ptr != 0)
1532 break;
1533 ++(dup_ptr->count);
1534 if (OPTS(DEBUG))
1535 fprintf(stderr, "static linked keyword = %.*s, index = %d\n", ptr->allchars_length, ptr->allchars, ptr->final_index);
1536 ptr = ptr->duplicate_link;
1537 }
1538 ++dup_ptr;
1539 }
1540 tmp = tmp->next;
1541 }
1542 loop {
1543 s32 i;
1545 if (dup_ptr <= duplicates)
1546 break;
1547 dup_ptr--;
1548 if (OPTS(DEBUG))
1549 fprintf(stderr, "dup_ptr[%lu]: hash_value = %d, index = %d, count = %d\n", (unsigned long)(dup_ptr - duplicates), dup_ptr->hash_value, dup_ptr->index, dup_ptr->count);
1550 /*
1551 * start searching for available space towards the right part of the lookup
1552 * array
1553 */
1554 i = dup_ptr->hash_value;
1555 loop {
1556 if (i >= lookup_array_size - 1)
1557 break;
1558 if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i + 1]
1560 goto found_i;
1561 ++i;
1562 }
1563 /* if we didn't find it to the right look to the left instead... */
1564 i = dup_ptr->hash_value - 1;
1565 loop {
1566 if (i < 0)
1567 break;
1568 if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i + 1]
1570 goto found_i;
1571 i--;
1572 }
1573 /* append to the end of lookup_array */
1574 i = lookup_array_size;
1575 lookup_array_size += 2;
1576 found_i:
1577 /*
1578 * Put in an indirection from dup_ptr->_hash_value to i.
1579 * At i and i+1 store dup_ptr->_final_index and dup_ptr->count.
1580 */
1581 lookup_array[dup_ptr->hash_value] = - 1 - t->total_keys - i;
1582 lookup_array[i] = - t->total_keys + dup_ptr->index;
1583 lookup_array[i + 1] = - dup_ptr->count;
1584 /* All these three values are <= -2, distinct from DEFAULT_VALUE */
1585 }
1586 /* the values of the lookup array are now known */
1587 min = S32_MAX;
1588 max = S32_MIN;
1589 lookup_ptr = lookup_array + lookup_array_size;
1590 loop {
1591 s32 val;
1593 if (lookup_ptr <= lookup_array)
1594 break;
1595 val = *--lookup_ptr;
1596 if (min > val)
1597 min = val;
1598 if (max < val)
1599 max = val;
1600 }
1601 indent = OPTS(GLOBAL) ? "" : " ";
1602 printf(
1603 "%sstatic %s%s lookup[] =\n"
1604 "%s {", indent, const_readonly_array, smallest_integral_type_2(min, max), indent);
1605 /* calculate maximum number of digits required for MIN..MAX */
1606 {
1607 s32 trunc;
1609 field_width = 2;
1610 trunc = max;
1611 loop {
1612 trunc /= 10;
1613 if (trunc <= 0)
1614 break;
1615 ++field_width;
1616 }
1617 }
1618 if (min < 0) {
1619 s32 neg_field_width;
1620 s32 trunc;
1622 neg_field_width = 2;
1623 trunc = -min;
1624 loop {
1625 trunc /= 10;
1626 if (trunc <= 0)
1627 break;
1628 ++neg_field_width;
1629 }
1630 ++neg_field_width; /* account for the minus sign */
1631 if (field_width < neg_field_width)
1632 field_width = neg_field_width;
1633 }
1634 columns = 42 / field_width;
1635 column = 0;
1636 i = 0;
1637 loop {
1638 if (i >= lookup_array_size)
1639 break;
1640 if (i > 0)
1641 printf(",");
1642 if ((column % columns) == 0)
1643 printf("\n%s ", indent);
1644 ++column;
1645 printf("%*d", field_width, lookup_array[i]);
1646 ++i;
1647 }
1648 printf(
1649 "\n%s };\n\n", indent);
1650 free(duplicates);
1651 free(lookup_array);
1652 }/*}}}*/
1653 /*{{{ output_lookup_function */
1654 /* generates C code for the lookup function */
1655 static void output_lookup_function(struct Output *t)
1656 {
1657 /* output the function's head */
1658 /*
1659 * We don't declare the lookup function 'static' because we cannot make assumptions about
1660 * the compilation units of the user.
1661 * Since we don't make it 'static', it makes no sense to declare it 'inline', because
1662 * non-static inline functions must not reference static functions or variables, see ISO C
1663 * 99 section 6.7.4.(3).
1664 */
1665 printf(
1666 "%s%s\n", const_for_struct, t->return_type);
1668 printf(
1669 "%s::", options->class_name);
1670 printf("%s ", options->function_name);
1671 printf(
1672 OPTS(KRC) ? "(str, len)\n"
1673 " %schar *str;\n"
1674 " %ssize_t len;\n" :
1675 OPTS(C) ? "(str, len)\n"
1676 " %sconst char *str;\n"
1677 " %ssize_t len;\n" :
1678 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *str, %ssize_t len)\n" :
1679 "", register_scs, register_scs);
1681 /* output the function's body */
1682 printf(
1683 "{\n");
1684 if (OPTS(ENUM) && !OPTS(GLOBAL))
1685 output_constants_enum(t, " ");
1687 output_lookup_pools(t);
1688 if (!OPTS(GLOBAL))
1689 output_lookup_tables(t);
1690 if (OPTS(LENTABLE))
1691 output_lookup_function_body(t, output_comparison_memcmp);
1692 else {
1693 if (OPTS(COMP))
1694 output_lookup_function_body(t, output_comparison_strncmp);
1695 else
1696 output_lookup_function_body(t, output_comparison_strcmp);
1697 }
1698 printf(
1699 "}\n");
1700 }/*}}}*/
1701 /*{{{ output_lookup_function_body */
1702 static void output_lookup_function_body(struct Output *t,
1703 void (*output_comparison)(u8 *expr1, u8 *expr2))
1704 {
1705 printf(
1706 " if (len <= %sMAX_WORD_LENGTH && len >= %sMIN_WORD_LENGTH)\n"
1707 " {\n"
1708 " %sunsigned int key = %s (str, len);\n\n", options->constants_prefix,
1709 options->constants_prefix, register_scs, options->hash_name);
1710 if (OPTS(SWITCH)) {
1711 s32 switch_size;
1712 s32 num_switches;
1714 switch_size = output_num_hash_values(t);
1715 num_switches = options->total_switches;
1716 if (num_switches > switch_size)
1717 num_switches = switch_size;
1718 printf(
1719 " if (key <= %sMAX_HASH_VALUE", options->constants_prefix);
1720 if (t->min_hash_value > 0)
1721 printf(
1722 " && key >= %sMIN_HASH_VALUE", options->constants_prefix);
1723 printf (
1724 ")\n"
1725 " {\n");
1726 if (OPTS(DUP) && t->total_duplicates > 0) {
1727 if (OPTS(LENTABLE))
1728 printf(
1729 " %s%s%s *lengthptr;\n", register_scs, const_always, smallest_integral_type(
1730 t->max_key_len));
1731 printf(
1732 " %s", register_scs);
1733 output_const_type(const_readonly_array, t->wordlist_eltype);
1734 printf("*wordptr;\n");
1735 printf(
1736 " %s", register_scs);
1737 output_const_type(const_readonly_array, t->wordlist_eltype);
1738 printf("*wordendptr;\n");
1739 }
1740 if (OPTS(TYPE)) {
1741 printf(
1742 " %s", register_scs);
1743 output_const_type(const_readonly_array, t->struct_tag);
1744 printf("*resword;\n\n");
1745 } else
1746 printf(
1747 " %s%sresword;\n\n", register_scs, t->struct_tag);
1748 output_switches(t->head, num_switches, switch_size, t->min_hash_value,
1749 t->max_hash_value, 10);
1750 printf(
1751 " return 0;\n");
1752 if (OPTS(DUP) && t->total_duplicates > 0) {
1753 s32 indent;
1755 indent = 8;
1756 printf(
1757 "%*smulticompare:\n"
1758 "%*s while (wordptr < wordendptr)\n"
1759 "%*s {\n", indent, "", indent, "", indent, "");
1760 if (OPTS(LENTABLE)) {
1761 printf(
1762 "%*s if (len == *lengthptr)\n"
1763 "%*s {\n", indent, "", indent, "");
1764 indent += 4;
1765 }
1766 printf(
1767 "%*s %s%schar *s = ", indent, "", register_scs, const_always);
1768 if (OPTS(TYPE))
1769 printf("wordptr->%s", options->slot_name);
1770 else
1771 printf("*wordptr");
1773 printf(" + %s", options->stringpool_name);
1774 printf(";\n\n"
1775 "%*s if (", indent, "");
1776 output_comparison("str", "s");
1777 printf(")\n"
1778 "%*s return %s;\n", indent, "", OPTS(TYPE) ? "wordptr" : "s");
1779 if (OPTS(LENTABLE)) {
1780 indent -= 4;
1781 printf(
1782 "%*s }\n", indent, "");
1783 }
1784 if (OPTS(LENTABLE))
1785 printf(
1786 "%*s lengthptr++;\n", indent, "");
1787 printf(
1788 "%*s wordptr++;\n"
1789 "%*s }\n"
1790 "%*s return 0;\n", indent, "", indent, "", indent, "");
1791 }
1792 printf(
1793 " compare:\n");
1794 if (OPTS(TYPE)) {
1795 printf(
1796 " {\n"
1797 " %s%schar *s = resword->%s", register_scs, const_always, options->slot_name);
1799 printf(" + %s", options->stringpool_name);
1800 printf(";\n\n"
1801 " if (");
1802 output_comparison("str", "s");
1803 printf(
1804 ")\n"
1805 " return resword;\n"
1806 " }\n");
1807 } else {
1808 output_comparison("str", "resword");
1809 printf(
1810 ")\n"
1811 " return resword;\n");
1812 }
1813 printf(
1814 " }\n");
1815 } else {
1816 printf(
1817 " if (key <= %sMAX_HASH_VALUE)\n", options->constants_prefix);
1818 if (OPTS(DUP)) {
1819 s32 indent;
1821 indent = 8;
1822 printf(
1823 "%*s{\n"
1824 "%*s %sint index = lookup[key];\n\n"
1825 "%*s if (index >= 0)\n", indent, "", indent, "", register_scs, indent, "");
1826 if (OPTS(LENTABLE)) {
1827 printf(
1828 "%*s {\n"
1829 "%*s if (len == %s[index])\n", indent, "", indent, "", options->lengthtable_name);
1830 indent += 4;
1831 }
1832 printf(
1833 "%*s {\n"
1834 "%*s %s%schar *s = %s[index]", indent, "", indent, "", register_scs, const_always,
1835 options->wordlist_name);
1836 if (OPTS(TYPE))
1837 printf(".%s", options->slot_name);
1839 printf (" + %s", options->stringpool_name);
1840 printf(";\n\n"
1841 "%*s if (", indent, "");
1842 output_comparison("str", "s");
1843 printf (")\n"
1844 "%*s return ", indent, "");
1845 if (OPTS(TYPE))
1846 printf("&%s[index]", options->wordlist_name);
1847 else
1848 printf("s");
1849 printf(";\n"
1850 "%*s }\n", indent, "");
1851 if (OPTS(LENTABLE)) {
1852 indent -= 4;
1853 printf(
1854 "%*s }\n", indent, "");
1855 }
1856 if (t->total_duplicates > 0) {
1857 printf(
1858 "%*s else if (index < -%sTOTAL_KEYWORDS)\n"
1859 "%*s {\n"
1860 "%*s %sint offset = - 1 - %sTOTAL_KEYWORDS - index;\n", indent, "", options->constants_prefix,
1861 indent, "", indent, "", register_scs,
1862 options->constants_prefix);
1863 if (OPTS(LENTABLE))
1864 printf(
1865 "%*s %s%s%s *lengthptr = &%s[%sTOTAL_KEYWORDS + lookup[offset]];\n", indent, "",
1866 register_scs, const_always,
1867 smallest_integral_type(t->max_key_len),
1868 options->lengthtable_name,
1869 options->constants_prefix);
1870 printf(
1871 "%*s %s", indent, "", register_scs);
1872 output_const_type(const_readonly_array, t->wordlist_eltype);
1873 printf("*wordptr = &%s[%sTOTAL_KEYWORDS + lookup[offset]];\n",
1874 options->wordlist_name, options->constants_prefix);
1875 printf(
1876 "%*s %s", indent, "", register_scs);
1877 output_const_type(const_readonly_array, t->wordlist_eltype);
1878 printf("*wordendptr = wordptr + -lookup[offset + 1];\n\n");
1879 printf(
1880 "%*s while (wordptr < wordendptr)\n"
1881 "%*s {\n", indent, "", indent, "");
1882 if (OPTS(LENTABLE)) {
1883 printf(
1884 "%*s if (len == *lengthptr)\n"
1885 "%*s {\n", indent, "", indent, "");
1886 indent += 4;
1887 }
1888 printf(
1889 "%*s %s%schar *s = ", indent, "", register_scs, const_always);
1890 if (OPTS(TYPE))
1891 printf("wordptr->%s", options->slot_name);
1892 else
1893 printf("*wordptr");
1895 printf(" + %s", options->stringpool_name);
1896 printf (";\n\n"
1897 "%*s if (", indent, "");
1898 output_comparison("str", "s");
1899 printf (")\n"
1900 "%*s return %s;\n", indent, "", OPTS(TYPE) ? "wordptr" : "s");
1901 if (OPTS(LENTABLE)) {
1902 indent -= 4;
1903 printf(
1904 "%*s }\n", indent, "");
1905 }
1906 if (OPTS(LENTABLE))
1907 printf(
1908 "%*s lengthptr++;\n", indent, "");
1909 printf(
1910 "%*s wordptr++;\n"
1911 "%*s }\n"
1912 "%*s }\n", indent, "", indent, "", indent, "");
1913 }
1914 printf(
1915 "%*s}\n", indent, "");
1916 } else {
1917 s32 indent;
1919 indent = 8;
1920 if (OPTS(LENTABLE)) {
1921 printf(
1922 "%*sif (len == %s[key])\n", indent, "", options->lengthtable_name);
1923 indent += 2;
1924 }
1925 if (OPTS(SHAREDLIB)) {
1926 if (!OPTS(LENTABLE)) {
1927 printf(
1928 "%*s{\n"
1929 "%*s %sint o = %s[key]", indent, "", indent, "", register_scs,
1930 options->wordlist_name);
1931 if (OPTS(TYPE))
1932 printf(".%s", options->slot_name);
1933 printf (";\n"
1934 "%*s if (o >= 0)\n"
1935 "%*s {\n", indent, "", indent, "");
1936 indent += 4;
1937 printf(
1938 "%*s %s%schar *s = o", indent, "", register_scs, const_always);
1939 } else {
1940 /*
1941 * no need for the (o >= 0) test, because the
1942 * (len == lengthtable[key]) test already guarantees that
1943 * key points to nonempty table entry
1944 */
1945 printf (
1946 "%*s{\n"
1947 "%*s %s%schar *s = %s[key]", indent, "", indent, "", register_scs, const_always,
1948 options->wordlist_name);
1949 if (OPTS(TYPE))
1950 printf(".%s", options->slot_name);
1951 }
1952 printf (" + %s", options->stringpool_name);
1953 } else {
1954 printf(
1955 "%*s{\n"
1956 "%*s %s%schar *s = %s[key]", indent, "", indent, "", register_scs, const_always,
1957 options->wordlist_name);
1958 if (OPTS(TYPE))
1959 printf(".%s", options->slot_name);
1960 }
1961 printf (";\n\n"
1962 "%*s if (", indent, "");
1964 printf ("s && ");
1965 output_comparison("str", "s");
1966 printf (")\n"
1967 "%*s return ", indent, "");
1968 if (OPTS(TYPE))
1969 printf("&%s[key]", options->wordlist_name);
1970 else
1971 printf("s");
1972 printf(";\n");
1974 indent -= 4;
1975 printf(
1976 "%*s }\n", indent, "");
1977 }
1978 printf(
1979 "%*s}\n", indent, "");
1980 }
1981 }
1982 printf(
1983 " }\n"
1984 " return 0;\n");
1985 }/*}}}*/
1986 /*{{{ output_num_hash_values */
1987 /* Returns the number of different hash values. */
1988 static s32 output_num_hash_values(struct Output *t)
1989 {
1990 s32 count;
1991 struct Keyword_List *tmp;
1992 /*
1993 * since the list is already sorted by hash value and doesn't contain duplicates, we can
1994 * simply count the number of keywords on the list
1995 */
1996 count = 0;
1997 tmp = t->head;
1998 loop {
1999 if (tmp == 0)
2000 break;
2001 ++count;
2002 tmp = tmp->next;
2003 }
2004 return count;
2005 }/*}}}*/
2006 /*------------------------------------------------------------------------------------------------*/
2008 /*------------------------------------------------------------------------------------------------*/
2009 #define EPILOG
2010 #include "namespace/globals.h"
2011 #include "namespace/options.h"
2012 #include "namespace/output.h"
2013 #include "namespace/output.c"
2014 #include "namespace/keyword.h"
2015 #include "namespace/keyword_list.h"
2016 #include "namespace/positions.h"
2017 #undef EPILOG
2018 /*------------------------------------------------------------------------------------------------*/
2019 #endif
File output.h added (mode: 100644) (index 0000000..f0221f3)
3 #include <stdbool.h>
4 #include "c_fixing.h"
5 #include "keyword_list.h"
6 #include "positions.h"
7 /*------------------------------------------------------------------------------------------------*/
8 #include "namespace/globals.h"
9 #include "namespace/options.h"
10 #include "namespace/output.h"
11 #include "namespace/keyword_list.h"
12 #include "namespace/positions.h"
13 /*------------------------------------------------------------------------------------------------*/
14 /*{{{ types */
15 struct Output {
16 /*{{{ private */
17 /* linked list of keywords */
18 struct Keyword_List *head;
19 /* declaration of struct type for a keyword and its attributes */
20 u8 *struct_decl;
21 u32 struct_decl_lineno;
22 /* pointer to return type for lookup function */
23 u8 *return_type;
24 /* shorthand for user-defined struct tag type */
25 u8 *struct_tag;
26 /* the C code from the declarations section */
27 u8 *verbatim_declarations;
28 u8 *verbatim_declarations_end;
29 u32 verbatim_declarations_lineno;
30 /* the C code from the end of the file */
31 u8 *verbatim_code;
32 u8 *verbatim_code_end;
33 u32 verbatim_code_lineno;
34 /* whether the keyword chars would have different values in a different character set */
35 bool charset_dependent;
36 /* total number of keys, counting duplicates */
37 s32 total_keys;
38 /* maximum length of the longest keyword */
39 s32 max_key_len;
40 /* minimum length of the shortest keyword */
41 s32 min_key_len;
42 /* whether the hash function includes the length */
43 bool hash_includes_len;
44 /* key positions */
45 struct Positions *key_positions;
46 /* adjustments to add to bytes add specific key positions */
47 u32 *alpha_inc;
48 /* total number of duplicate hash values */
49 s32 total_duplicates;
50 /* size of alphabet */
51 u32 alpha_size;
52 /* value associated with each character */
53 s32 *asso_values;
54 /* minimum hash value for all keywords */
55 s32 min_hash_value;
56 /* maximum hash value for all keywords */
57 s32 max_hash_value;
58 /* element type of keyword array */
59 u8 *wordlist_eltype;
60 /*}}} private -- END */
61 };
62 /*}}} types -- END */
63 /*{{{ public static methods */
64 static struct Output *output_new(struct Keyword_List *head, u8 *struct_decl,
65 u32 struct_decl_lineno, u8 *return_type,
66 u8 *struct_tag, u8 *verbatim_declarations,
67 u8 *verbatim_declarations_end,
68 u32 verbatim_declarations_lineno,
69 u8 *verbatim_code, u8 *verbatim_code_end,
70 u32 verbatim_code_lineno, bool charset_dependent,
71 s32 total_keys, s32 max_key_len, s32 min_key_len,
72 bool hash_includes_len, struct Positions *positions,
73 u32 *alpha_inc, s32 total_duplicates,
74 u32 alpha_size, s32 *asso_values);
75 static void output_do(struct Output *t);
76 static void output_compute_min_max(struct Output *t);
77 /*}}} public static methods -- END */
78 /*{{{ private static methods */
79 static void output_constants_defines(struct Output *t);
80 static void output_constants_enum(struct Output *t, u8 *indentation);
81 static void output_hash_function(struct Output *t);
82 static void output_asso_values_ref(struct Output *t, s32 pos);
83 static void output_asso_values_index(struct Output *t, s32 pos);
84 static void output_lookup_pools(struct Output *t);
85 static void output_string_pool(struct Output *t);
86 static void output_lookup_tables(struct Output *t);
87 static void output_keylength_table(struct Output *t);
88 static void output_keyword_table(struct Output *t);
89 static void output_lookup_array(struct Output *t);
90 static void output_lookup_function(struct Output *t);
91 static void output_lookup_function_body(struct Output *t,
92 void (*output_comparison)(u8 *expr1, u8 *expr2));
93 static s32 output_num_hash_values(struct Output *t);
94 /*}}} private static methods -- END */
95 /*------------------------------------------------------------------------------------------------*/
96 #define EPILOG
97 #include "namespace/globals.h"
98 #include "namespace/options.h"
99 #include "namespace/output.h"
100 #include "namespace/keyword_list.h"
101 #include "namespace/positions.h"
102 #undef EPILOG
103 /*------------------------------------------------------------------------------------------------*/
104 #endif
File positions.c added (mode: 100644) (index 0000000..84b2564)
3 #include <stdbool.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include "c_fixing.h"
8 #include "positions.h"
9 /*------------------------------------------------------------------------------------------------*/
10 #include "namespace/positions.h"
11 /*------------------------------------------------------------------------------------------------*/
12 /*{{{ pos_new */
13 static struct Positions *pos_new(void)
14 {
15 struct Positions *t;
17 t = calloc(1, sizeof(*t));
18 t->useall = false;
19 t->size = 0;
20 return t;
21 }/*}}}*/
22 /*{{{ pos_new_cpy */
23 /* the copy constructor */
24 static struct Positions *pos_new_cpy(struct Positions *src)
25 {
26 struct Positions *t;
28 t = malloc(sizeof(*t));
29 memcpy(t, src, sizeof(struct Positions));
30 return t;
31 }/*}}}*/
32 /*{{{ pos_del */
33 static void pos_del(struct Positions *t)
34 {
35 free(t);
36 }/*}}}*/
37 /*{{{ pos_set_useall */
38 static void pos_set_useall(struct Positions *t, bool useall)
39 {
40 t->useall = useall;
41 if (useall) {
42 s32 *ptr;
43 s32 i;
44 /* The positions are 0, 1, ..., Positions_max_key_pos-1, in descending order */
45 t->size = POS_MAX_KEY_POS;
46 ptr = t->positions;
47 i = POS_MAX_KEY_POS - 1;
48 loop {
49 if (i < 0)
50 break;
51 *ptr++ = i;
52 i--;
53 }
54 }
55 }/*}}}*/
56 /*{{{ pos_sort */
57 static bool pos_sort(struct Positions *t)
58 {
59 /*
60 * Sorts the array in reverse order. Returns true if there are no duplicates, false
61 * otherwise
62 */
63 bool duplicate_free;
64 s32 *base;
65 u32 len;
66 u32 i;
68 if (t->useall)
69 return true;
70 /* bubble sort */
71 duplicate_free = true;
72 base = t->positions;
73 len = t->size;
75 i = 1;
76 loop {
77 u32 j;
78 s32 tmp;
80 if (i >= len)
81 break;
82 j = i;
83 tmp = base[j];
84 loop {
85 if ((j == 0) || (tmp < base[j - 1]))
86 break;
87 base[j] = base[j - 1];
88 if (base[j] == tmp) /* oh no, a duplicate!!! */
89 duplicate_free = false;
90 j--;
91 }
92 base[j] = tmp;
93 ++i;
94 }
95 return duplicate_free;
96 }/*}}}*/
97 /*{{{ pos_contains */
98 /* assumes the array is in reverse order */
99 static bool pos_contains(struct Positions *t, s32 pos)
100 {
101 u32 count;
102 s32 *p;
104 count = t->size;
105 p = t->positions + t->size - 1;
106 loop {
107 if (count == 0)
108 break;
109 if (*p == pos)
110 return true;
111 if (*p > pos)
112 break;
113 p--;
114 count--;
115 }
116 return false;
117 }/*}}}*/
118 /*{{{ pos_remove */
119 static void pos_remove(struct Positions *t, s32 pos)
120 {
121 u32 count;
123 pos_set_useall(t, false);
124 count = t->size;
125 if (count > 0) {
126 s32 *p;
128 p = t->positions + t->size - 1;
129 if (*p == pos) {
130 (t->size)--;
131 return;
132 }
133 if (*p < pos) {
134 s32 prev;
136 prev = *p;
137 loop {
138 s32 curr;
140 p--;
141 count--;
142 if (count == 0)
143 break;
144 if (*p == pos) {
145 *p = prev;
146 (t->size)--;
147 return;
148 }
149 if (*p > pos)
150 break;
151 curr = *p;
152 *p = prev;
153 prev = curr;
154 }
155 }
156 }
157 fprintf(stderr, "Positions::remove internal error: not found\n");
158 exit(1);
159 }/*}}}*/
160 /*{{{ pos_add */
161 /* assumes the array is in reverse order */
162 static void pos_add(struct Positions *t, s32 pos)
163 {
164 u32 count;
165 s32 *p;
167 pos_set_useall(t, false);
169 count = t->size;
170 if (count == POS_MAX_SIZE) {
171 fprintf(stderr, "Positions_add internal error: overflow\n");
172 exit(1);
173 }
174 p = t->positions + t->size - 1;
175 loop {
176 if (count == 0)
177 break;
178 if (*p == pos) {
179 fprintf(stderr, "Positions_add internal error: duplicate\n");
180 exit(1);
181 }
182 if (*p > pos)
183 break;
184 p[1] = p[0];
185 p--;
186 count--;
187 }
188 p[1] = pos;
189 ++(t->size);
190 }/*}}}*/
191 /*{{{ pos_iterator */
192 /*
193 * creates an iterator, returning the positions in descending order, that apply to strings of length
194 * <= maxlen.
195 */
196 static struct PositionIterator *pos_iterator(struct Positions *t, s32 maxlen)
197 {
198 return positer_new(t, maxlen);
199 }/*}}}*/
200 /*{{{ pos_iterator_all */
201 /* creates an iterator, returning the positions in descending order */
202 static struct PositionIterator *pos_iterator_all(struct Positions *t)
203 {
204 return positer_new_all(t);
205 }/*}}}*/
206 /*{{{ pos_reviterator */
207 /* creates an iterator, returning the positions in ascending order */
208 static struct PositionReverseIterator *pos_reviterator(struct Positions *t)
209 {
210 return posrevit_new(t);
211 }/*}}}*/
212 /*{{{ positer_new */
213 /* initializes an iterator through POSITIONS, ignoring positions >= maxlen */
214 static struct PositionIterator *positer_new(struct Positions *positions, s32 maxlen)
215 {
216 struct PositionIterator *t;
218 t = calloc(1, sizeof(*t));
219 t->set = positions;
221 if (positions->useall) {
222 t->index = (maxlen <= (s32)POS_MAX_KEY_POS ? (s32)POS_MAX_KEY_POS - maxlen : 0);
223 } else {
224 u32 index;
226 index = 0;
227 loop {
228 if (index >= positions->size || positions->positions[index] < maxlen)
229 break;
230 ++index;
231 }
232 t->index = index;
233 }
234 return t;
235 }/*}}}*/
236 /*{{{ positer_new_all */
237 /* initializes an iterator through POSITIONS */
238 static struct PositionIterator *positer_new_all(struct Positions *positions)
239 {
240 struct PositionIterator *t;
242 t = calloc(1, sizeof(*t));
243 t->set = positions;
244 return t;
245 }/*}}}*/
246 /*{{{ positer_remaining */
247 /* returns the number of remaining positions, i.e. how often next() will return a value != EOS */
248 static u32 positer_remaining(struct PositionIterator *t)
249 {
250 return t->set->size - t->index;
251 }/*}}}*/
252 /*{{{ positer_next */
253 /* retrieves the next position, or EOS past the end */
254 static s32 positer_next(struct PositionIterator *t)
255 {
256 s32 r;
258 r = t->index < t->set->size ? t->set->positions[t->index] : POSITER_EOS;
259 ++(t->index);
260 return r;
261 }/*}}}*/
262 /*{{{ positer_del */
263 static void positer_del(struct PositionIterator *t)
264 {
265 free(t);
266 }/*}}}*/
267 /*{{{ posrevit_new */
268 static struct PositionReverseIterator *posrevit_new(struct Positions *positions)
269 {
270 struct PositionReverseIterator *t;
272 t = calloc(1, sizeof(*t));
273 t->set = positions;
274 t->index = t->set->size;
275 return t;
276 }/*}}}*/
277 /*{{{ posrevit_del */
278 static void posrevit_del(struct PositionReverseIterator *t)
279 {
280 free(t);
281 }/*}}}*/
282 /*{{{ posrevit_next */
283 /* retrieves the next position, or EOS past the end */
284 static s32 posrevit_next(struct PositionReverseIterator *t)
285 {
286 s32 r;
288 (t->index)--;
289 r = (t->index > t->minindex ? t->set->positions[t->index] : POSREVIT_EOS);
290 return r;
291 }
292 /*}}}*/
293 /*{{{ pos_cpy */
294 /* _NOT_ the copy constructor */
295 static void pos_cpy(struct Positions *d, struct Positions *s)
296 {
297 memcpy(d, s, sizeof(struct Positions));
298 }/*}}}*/
299 /*{{{ pos_print */
300 static void pos_print(struct Positions *t)
301 {
302 bool first;
303 bool seen_LASTCHAR;
304 u32 count;
305 s32 *p;
307 if (t->useall) {
308 printf ("*");
309 return;
310 }
311 first = true;
312 seen_LASTCHAR = false;
313 count = t->size;
314 p = t->positions + t->size - 1;
315 loop {
316 if (count == 0)
317 break;
318 count--;
319 if (*p == POS_LASTCHAR)
320 seen_LASTCHAR = true;
321 else {
322 if (!first)
323 printf(",");
324 printf("%d", *p + 1);
325 if (count > 0 && p[-1] == *p + 1) {
326 printf("-");
327 loop {
328 p--;
329 count--;
330 if (!(count > 0 && p[-1] == *p + 1))
331 break;
332 }
333 printf("%d", *p + 1);
334 }
335 first = false;
336 }
337 p--;
338 }
339 if (seen_LASTCHAR) {
340 if (!first)
341 printf(",");
342 printf("$");
343 }
344 }/*}}}*/
345 /*------------------------------------------------------------------------------------------------*/
346 #define EPILOG
347 #include "namespace/positions.h"
348 #undef EPILOG
349 /*------------------------------------------------------------------------------------------------*/
350 #endif
File positions.h added (mode: 100644) (index 0000000..7e1698c)
3 #include <stdbool.h>
4 #include "c_fixing.h"
5 /*------------------------------------------------------------------------------------------------*/
6 #include "namespace/positions.h"
7 /*------------------------------------------------------------------------------------------------*/
8 /*{{{ Positions */
9 struct PositionIterator;
10 struct PositionReverseIterator;
11 /*{{{ constants */
12 enum {
13 /*
14 * Maximum key position specifiable by the user, 1-based. Note that max_key_pos-1 must fit
15 * into the element type of positions[], below.
16 */
17 POS_MAX_KEY_POS = 255,
18 /* Denotes the last char of a keyword, depending on the keyword's length */
20 /*
21 * Maximum possible size. Since duplicates are eliminated and the possible 0-based positions
22 * are -1..max_key_pos-1, this is:
23 */
25 };
26 /*}}} constants -- END */
27 /*{{{ types */
28 struct Positions {
29 /*{{{ public */
30 /*
31 * array of positions. 0 for the first char, 1 for the second char etc., lastchar for the
32 * last char
33 */
34 s32 positions[POS_MAX_SIZE];
35 /* number of positions */
36 u32 size;
37 /*}}} public -- END */
38 /*{{{ private */
39 /* The special case denoted by '*' */
40 bool useall;
41 /*}}} private -- END */
42 };
43 /*}}} types -- END */
44 /*{{{ public static methods */
45 static struct Positions *pos_new(void);
46 static struct Positions *pos_new_cpy(struct Positions *src);
47 static void pos_print(struct Positions *t);
48 static void pos_del(struct Positions *t);
49 static bool pos_contains(struct Positions *t, s32 pos);
50 static void pos_add(struct Positions *t, s32 pos);
51 static void pos_remove(struct Positions *t, s32 pos);
52 /* Write access */
53 static void pos_set_useall(struct Positions *t, bool useall);
54 static bool pos_sort(struct Positions *t);
55 static struct PositionIterator *pos_iterator(struct Positions *t, s32 maxlen);
56 static struct PositionIterator *pos_iterator_all(struct Positions *t);
57 static struct PositionReverseIterator *pos_reviterator(struct Positions *t);
58 static void pos_cpy(struct Positions *d, struct Positions *s);
59 /*}}} public static methods -- END */
60 /*}}} Position -- END */
61 /*{{{ PositionIterator */
62 /*{{{ constants */
63 enum {
64 /* end of iteration marker */
65 POSITER_EOS = -2,
66 };
67 /*}}} constants -- END */
68 /*{{{ types */
69 struct PositionIterator {
70 /*{{{ private */
71 struct Positions *set;
72 u32 index;
73 /*}}}*/
74 };
75 /*}}} types -- END */
76 /*{{{ public static methods */
77 static struct PositionIterator *positer_new(struct Positions *positions, s32 maxlen);
78 static struct PositionIterator *positer_new_all(struct Positions *positions);
79 static void positer_del(struct PositionIterator *t);
80 static u32 positer_remaining(struct PositionIterator *t);
81 static s32 positer_next(struct PositionIterator *t);
82 /*}}} public static methods -- END */
83 /*}}} PositionIterator -- END */
84 /*{{{ PositionReverseIterator */
85 /*{{{ constants and types */
86 enum {
87 /* end of iteration marker */
89 };
90 struct PositionReverseIterator {
91 /*{{{ private */
92 struct Positions *set;
93 u32 index;
94 u32 minindex;
95 /*}}} private -- END */
96 };
97 /*}}} constants and types -- END */
98 /*{{{ public static methods */
99 static struct PositionReverseIterator *posrevit_new(struct Positions *positions);
100 static void posrevit_del(struct PositionReverseIterator *t);
101 static s32 posrevit_next(struct PositionReverseIterator *t);
102 /*}}} public static methods -- END */
103 /*}}} PositionReverseIterator -- END */
104 /*------------------------------------------------------------------------------------------------*/
105 #define EPILOG
106 #include "namespace/positions.h"
107 #undef EPILOG
108 /*------------------------------------------------------------------------------------------------*/
109 #endif
File search.c added (mode: 100644) (index 0000000..294deac)
3 #include <stdlib.h>
4 #include <string.h>
5 #include <time.h>
6 #include <math.h>
7 #include "c_fixing.h"
8 #include "globals.h"
9 #include "search.h"
10 #include "keyword.h"
11 #include "keyword_list.h"
12 #include "options.h"
13 #include "positions.h"
14 #include "hash-table.h"
15 #include "bool-array.h"
16 /*------------------------------------------------------------------------------------------------*/
17 #include "namespace/globals.h"
18 #include "namespace/search.h"
19 #include "namespace/keyword.h"
20 #include "namespace/keyword_list.h"
21 #include "namespace/options.h"
22 #include "namespace/positions.h"
23 #include "namespace/hash-table.h"
24 #include "namespace/bool-array.h"
25 #include "namespace/search.c"
26 /*------------------------------------------------------------------------------------------------*/
27 /*{{{ THEORY */
28 /* The general form of the hash function is
30 hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
31 + len (keyword)
33 where Pos is a set of byte positions,
34 each alpha_inc[i] is a nonnegative integer,
35 each asso_values[c] is a nonnegative integer,
36 len (keyword) is the keyword's length if _hash_includes_len, or 0 otherwise.
38 Theorem 1: If all keywords are different, there is a set Pos such that
39 all tuples (keyword[i] : i in Pos) are different.
41 Theorem 2: If all tuples (keyword[i] : i in Pos) are different, there
42 are nonnegative integers alpha_inc[i] such that all multisets
43 {keyword[i] + alpha_inc[i] : i in Pos} are different.
45 Define selchars[keyword] := {keyword[i] + alpha_inc[i] : i in Pos}.
47 Theorem 3: If all multisets selchars[keyword] are different, there are
48 nonnegative integers asso_values[c] such that all hash values
49 sum (asso_values[c] : c in selchars[keyword]) are different.
51 Based on these three facts, we find the hash function in three steps:
53 Step 1 (Finding good byte positions):
54 Find a set Pos, as small as possible, such that all tuples
55 (keyword[i] : i in Pos) are different.
57 Step 2 (Finding good alpha increments):
58 Find nonnegative integers alpha_inc[i], as many of them as possible being
59 zero, and the others being as small as possible, such that all multisets
60 {keyword[i] + alpha_inc[i] : i in Pos} are different.
62 Step 3 (Finding good asso_values):
63 Find asso_values[c] such that all hash (keyword) are different.
65 In other words, each step finds a projection that is injective on the
66 given finite set:
67 proj1 : String --> Map (Pos --> N)
68 proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos)
69 proj3 : Map (Pos --> N) / S(Pos) --> N
70 where
71 N denotes the set of nonnegative integers,
72 Map (A --> B) := Hom_Set (A, B) is the set of maps from A to B, and
73 S(Pos) is the symmetric group over Pos.
75 This was the theory for !_hash_includes_len; if _hash_includes_len, slight
76 modifications apply:
77 proj1 : String --> Map (Pos --> N) x N
78 proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N
79 proj3 : Map (Pos --> N) / S(Pos) x N --> N
81 For a case-insensitive hash function, the general form is
83 hash (keyword) =
84 sum (asso_values[alpha_unify[keyword[i] + alpha_inc[i]]] : i in Pos)
85 + len (keyword)
87 where alpha_unify[c] is chosen so that an upper/lower case change in
88 keyword[i] doesn't change alpha_unify[keyword[i] + alpha_inc[i]].
89 *//*}}} THEORY -- END */
90 /*{{{ finding asso_values[] that fit
91 The idea is to choose the _asso_values[] one by one, in a way that
92 a choice that has been made never needs to be undone later. This
93 means that we split the work into several steps. Each step chooses
94 one or more _asso_values[c]. The result of choosing one or more
95 _asso_values[c] is that the partitioning of the keyword set gets
96 broader.
97 Look at this partitioning: After every step, the _asso_values[] of a
98 certain set C of characters are undetermined. (At the beginning, C
99 is the set of characters c with _occurrences[c] > 0. At the end, C
100 is empty.) To each keyword K, we associate the multiset of _selchars
101 for which the _asso_values[] are undetermined:
102 K --> K->_selchars intersect C.
103 Consider two keywords equivalent if their value under this mapping is
104 the same. This introduces an equivalence relation on the set of
105 keywords. The equivalence classes partition the keyword set. (At the
106 beginning, the partition is the finest possible: each K is an equivalence
107 class by itself, because all K have a different _selchars. At the end,
108 all K have been merged into a single equivalence class.)
109 The partition before a step is always a refinement of the partition
110 after the step.
111 We choose the steps in such a way that the partition really becomes
112 broader at each step. (A step that only chooses an _asso_values[c]
113 without changing the partition is better merged with the previous step,
114 to avoid useless backtracking.) }}}*/
115 /*------------------------------------------------------------------------------------------------*/
116 /*{{{ local */
117 /*{{{ types */
118 struct EquivalenceClass
119 {
120 /* the keywords in this equivalence class */
121 struct Keyword_List *keywords;
122 struct Keyword_List *keywords_last;
123 /* the number of keywords in this equivalence class */
124 u32 cardinality;
125 /*
126 * the undetermined selected characters for the keywords in this equivalence class, as a
127 * canonically reordered multiset
128 */
129 u32 *undetermined_chars;
130 u32 undetermined_chars_length;
132 struct EquivalenceClass *next;
133 };
135 struct Step
136 {
137 /* the characters whose values are being determined in this step */
138 u32 changing_count;
139 u32 *changing;
140 /*
141 * Exclusive upper bound for the _asso_values[c] of this step. A power
142 * of 2.
143 */
144 u32 asso_value_max;
145 /* the characters whose values will be determined after this step */
146 bool *undetermined;
147 /* the keyword set partition after this step */
148 struct EquivalenceClass *partition;
149 /* the expected number of iterations in this step */
150 f64 expected_lower;
151 f64 expected_upper;
153 struct Step *next;
154 };
155 /*}}} types -- END */
156 /*{{{ code */
157 /*{{{ equals */
158 static bool equals(u32 *ptr1, u32 *ptr2, u32 len)
159 {
160 loop {
161 if (len == 0)
162 break;
163 if (*ptr1 != *ptr2)
164 return false;
165 ++ptr1;
166 ++ptr2;
167 len--;
168 }
169 return true;
170 }/*}}}*/
171 static void delete_partition(struct EquivalenceClass *partition)
172 {
173 loop {
174 struct EquivalenceClass *equclass;
176 if (partition == 0)
177 break;
178 equclass = partition;
179 partition = equclass->next;
180 delete_list(equclass->keywords);
181 free(equclass);
182 }
183 }
184 static bool less_by_hash_value(struct Keyword *kw1, struct Keyword *kw2)
185 {
186 return kw1->hash_value < kw2->hash_value;
187 }
188 /*}}} code -- END */
189 /*}}} local -- END */
190 /*------------------------------------------------------------------------------------------------*/
191 /*{{{ schr_new */
192 static struct Search *schr_new(struct Keyword_List *list)
193 {
194 struct Search *t;
196 t = calloc(1, sizeof(*t));
197 t->head = list;
198 t->key_positions = pos_new();
199 return t;
200 }/*}}}*/
201 /*{{{ schr_del */
202 static void schr_del(struct Search *t)
203 {
204 ba_del(t->collision_detector);
205 if (OPTS(DEBUG)) {
206 u32 i;
207 s32 field_width;
208 struct Keyword_List *ptr;
210 fprintf(stderr, "\ndumping occurrence and associated values tables\n");
211 i = 0;
212 loop {
213 if (i >= t->alpha_size)
214 break;
215 if (t->occurrences[i])
216 fprintf (stderr, "asso_values[%c] = %6d, occurrences[%c] = %6d\n", i, t->asso_values[i], i, t->occurrences[i]);
217 ++i;
218 }
219 fprintf(stderr, "end table dumping\n");
220 fprintf(stderr, "\nDumping key list information:\ntotal non-static linked keywords = %d\ntotal keywords = %d\ntotal duplicates = %d\nmaximum key length = %d\n", t->list_len, t->total_keys, t->total_duplicates, t->max_key_len);
221 field_width = t->max_selchars_length;
222 fprintf(stderr, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n", field_width, "selchars");
223 ptr = t->head;
224 loop {
225 s32 j;
227 if (ptr == 0)
228 break;
229 fprintf(stderr, "%11d,%11d,%6d, ", ptr->kw->hash_value, ptr->kw->allchars_length, ptr->kw->final_index);
230 if (field_width > ptr->kw->selchars_length)
231 fprintf(stderr, "%*s", field_width - ptr->kw->selchars_length, "");
232 j = 0;
233 loop {
234 if (j >= ptr->kw->selchars_length)
235 break;
236 putc(ptr->kw->selchars[j], stderr);
237 ++j;
238 }
239 fprintf(stderr, ", %.*s\n", ptr->kw->allchars_length, ptr->kw->allchars);
240 ptr = ptr->next;
241 }
242 fprintf(stderr, "End dumping list.\n\n");
243 }
244 pos_del(t->key_positions);
245 free(t->asso_values);
246 free(t->occurrences);
247 free(t->alpha_unify);
248 free(t->alpha_inc);
249 free(t);
250 }/*}}}*/
251 /*{{{ schr_optimize */
252 static void schr_optimize(struct Search *t)
253 {
254 struct Keyword_List *curr_ptr;
255 s32 max_hash_value;
256 u32 c;
258 /* preparations */
259 schr_prepare(t);
261 /* Step 1: Finding good byte positions. */
262 schr_find_positions(t);
264 /* Step 2: Finding good alpha increments. */
265 schr_find_alpha_inc(t);
267 /* Step 3: Finding good asso_values. */
268 schr_find_good_asso_values(t);
269 /* Make one final check, just to make sure nothing weird happened.... */
270 ba_clear(t->collision_detector);
271 curr_ptr = t->head;
272 loop {
273 struct Keyword *curr;
274 u32 hashcode;
276 if (curr_ptr == 0)
277 break;
278 curr = curr_ptr->kw;
279 hashcode = schr_compute_hash(t, curr);
280 if (ba_set_bit(t->collision_detector, hashcode)) {
281 /*
282 * This shouldn't happen. proj1, proj2, proj3 must have been computed to be
283 * injective on the given keyword set.
284 */
285 fprintf(stderr, "\nInternal error, unexpected duplicate hash code\n");
287 fprintf(stderr, "try options -m or -r, or use new key positions.\n\n");
288 else
289 fprintf(stderr, "try options -m or -r.\n\n");
290 exit(1);
291 }
292 curr_ptr = curr_ptr->next;
293 }
294 /* sorts the keyword list by hash value */
295 schr_sort(t);
296 /*
297 * Set unused asso_values[c] to max_hash_value + 1. This is not absolutely necessary, but
298 * speeds up the lookup function in many cases of lookup failure: no string comparison is
299 * needed once the hash value of a string is larger than the hash value of any keyword.
300 */
301 {
302 struct Keyword_List *tmp;
304 tmp = t->head;
305 loop {
306 if (tmp->next == 0)
307 break;
308 tmp = tmp->next;
309 }
310 max_hash_value = tmp->kw->hash_value;
311 }
312 c = 0;
313 loop {
314 if (c >= t->alpha_size)
315 break;
316 if (t->occurrences[c] == 0)
317 t->asso_values[c] = max_hash_value + 1;
318 ++c;
319 }
320 /* propagate unified asso_values */
321 if (t->alpha_unify) {
322 u32 c;
324 c = 0;
325 loop {
326 if (c >= t->alpha_size)
327 break;
328 if (t->alpha_unify[c] != c)
329 t->asso_values[c] = t->asso_values[t->alpha_unify[c]];
330 ++c;
331 }
332 }
333 }/*}}}*/
334 /*{{{ schr_prepare */
335 static void schr_prepare(struct Search *t)
336 {
337 struct Keyword_List *tmp;
339 t->total_keys = 0;
340 tmp = t->head;
341 loop {
342 if (tmp == 0)
343 break;
344 ++(t->total_keys);
345 tmp = tmp->next;
346 }
347 /* compute the minimum and maximum keyword length */
348 t->max_key_len = S32_MIN;
349 t->min_key_len = S32_MAX;
350 tmp = t->head;
351 loop {
352 struct Keyword *kw;
354 if (tmp == 0)
355 break;
356 kw = tmp->kw;
357 if (t->max_key_len < kw->allchars_length)
358 t->max_key_len = kw->allchars_length;
359 if (t->min_key_len > kw->allchars_length)
360 t->min_key_len = kw->allchars_length;
361 tmp = tmp->next;
362 }
363 /*
364 * exit program if an empty string is used as keyword, since the comparison expressions
365 * don't work correctly for looking up an empty string
366 */
367 if (t->min_key_len == 0) {
368 fprintf (stderr, "Empty input keyword is not allowed.\nTo recognize an empty input keyword, your code should check for\nlen == 0 before calling the gperf generated lookup function.\n");
369 exit(1);
370 }
371 /* exit program if the characters in the keywords are not in the required range */
372 if (OPTS(SEVENBIT)) {
373 tmp = t->head;
374 loop {
375 struct Keyword *kw;
376 u8 *k;
377 s32 i;
379 if (tmp == 0)
380 break;
381 kw = tmp->kw;
382 k = kw->allchars;
383 i = kw->allchars_length;
384 loop {
385 if (i <= 0)
386 break;
387 if (!(*k < 128)) {
388 fprintf(stderr, "Option --seven-bit has been specified,\nbut keyword \"%.*s\" contains non-ASCII characters.\nTry removing option --seven-bit.\n", kw->allchars_length, kw->allchars);
389 exit(1);
390 }
391 i--;
392 ++k;
393 }
394 tmp = tmp->next;
395 }
396 }
397 /* determine whether the hash function shall include the length */
398 t->hash_includes_len = !(OPTS(NOLENGTH) || (t->min_key_len == t->max_key_len));
399 }/*}}}*/
400 /*{{{ schr_find_positions */
401 /* find good key positions */
402 static void schr_find_positions(struct Search *t)
403 {
404 u32 *alpha_unify;
405 s32 imax;
406 struct Positions *mandatory;
407 struct Positions *current;
408 u32 current_duplicates_count;
409 /* if the user gave the key positions, we use them */
410 if (OPTS(POSITIONS)) {
411 pos_cpy(t->key_positions, options->key_positions);
412 return;
413 }
414 /* compute preliminary alpha_unify table */
415 alpha_unify = schr_compute_alpha_unify(t);
417 /* 1. find positions that must occur in order to distinguish duplicates */
418 mandatory = pos_new();
419 if (!OPTS(DUP)) {
420 struct Keyword_List *l1;
422 l1 = t->head;
423 loop {
424 struct Keyword *kw1;
425 struct Keyword_List *l2;
427 if (l1 == 0 || l1->next == 0)
428 break;
429 kw1 = l1->kw;
430 l2 = l1->next;
431 loop {
432 struct Keyword *kw2;
434 if (l2 == 0)
435 break;
436 kw2 = l2->kw;
437 /*
438 * if keyword1 and keyword2 have the same length and differ
439 * in just one position, and it is not the last character,
440 * this position is mandatory
441 */
442 if (kw1->allchars_length == kw2->allchars_length) {
443 s32 n;
444 s32 i;
446 n = kw1->allchars_length;
447 i = 0;
448 loop {
449 u32 c1;
450 u32 c2;
452 if (i >= (n - 1))
453 break;
454 c1 = kw1->allchars[i];
455 c2 = kw2->allchars[i];
457 if (c1 >= 'A' && c1 <= 'Z')
458 c1 += 'a' - 'A';
459 if (c2 >= 'A' && c2 <= 'Z')
460 c2 += 'a' - 'A';
461 }
462 if (c1 != c2)
463 break;
464 ++i;
465 }
466 if (i < (n - 1)) {
467 s32 j;
469 j = i + 1;
470 loop {
471 u32 c1;
472 u32 c2;
474 if (j >= n)
475 break;
476 c1 = kw1->allchars[j];
477 c2 = kw2->allchars[j];
479 if (c1 >= 'A' && c1 <= 'Z')
480 c1 += 'a' - 'A';
481 if (c2 >= 'A' && c2 <= 'Z')
482 c2 += 'a' - 'A';
483 }
484 if (c1 != c2)
485 break;
486 ++j;
487 }
488 if (j >= n) {
489 /* position i is mandatory */
490 if (!pos_contains(mandatory, i))
491 pos_add(mandatory, i);
492 }
493 }
494 }
495 l2 = l2->next;
496 }
497 l1 = l1->next;
498 }
499 }
500 /* 2. add positions, as long as this decreases the duplicates count */
501 imax = (t->max_key_len - 1 < (s32)POS_MAX_KEY_POS - 1 ? t->max_key_len - 1
502 : (s32)POS_MAX_KEY_POS - 1);
503 current = pos_new();
504 pos_cpy(current, mandatory);
505 current_duplicates_count = schr_count_duplicates_tuple_do(t, current, alpha_unify);
506 loop {
507 struct Positions *best;
508 u32 best_duplicates_count;
509 s32 i;
511 best = pos_new();
512 best_duplicates_count = U32_MAX;
513 i = imax;
514 loop {
515 if (i < -1)
516 break;
517 if (!pos_contains(current, i)) {
518 struct Positions *tryal;
519 u32 try_duplicates_count;
521 tryal = pos_new();
522 pos_cpy(tryal, current);
523 pos_add(tryal, i);
524 try_duplicates_count = schr_count_duplicates_tuple_do(t, tryal,
525 alpha_unify);
526 /*
527 * We prefer 'try' to 'best' if it produces less duplicates, or if
528 * it produces the same number of duplicates but with a more
529 * efficient hash function.
530 */
531 if (try_duplicates_count < best_duplicates_count
532 || (try_duplicates_count == best_duplicates_count
533 && i >=0)) {
534 pos_cpy(best, tryal);
535 best_duplicates_count = try_duplicates_count;
536 }
537 pos_del(tryal);
538 }
539 i--;
540 }
541 /* stop adding positions when it gives no improvement */
542 if (best_duplicates_count >= current_duplicates_count)
543 break;
544 pos_cpy(current, best);
545 pos_del(best);
546 current_duplicates_count = best_duplicates_count;
547 }
548 /* 3. remove positions, as long as this doesn't increase the duplicates count */
549 loop {
550 struct Positions *best;
551 u32 best_duplicates_count;
552 s32 i;
554 best = pos_new();