File input.c added (mode: 100644) (index 0000000..a6784a0) |
|
1 |
|
#ifndef CGPERF_INPUT_C |
|
2 |
|
#define CGPERF_INPUT_C |
|
3 |
|
#include <stdbool.h> |
|
4 |
|
#include <stdio.h> |
|
5 |
|
#include <stdlib.h> |
|
6 |
|
#include <string.h> |
|
7 |
|
#include "globals.h" |
|
8 |
|
#include "keyword.h" |
|
9 |
|
#include "input.h" |
|
10 |
|
#include "getline.h" |
|
11 |
|
#include "options.h" |
|
12 |
|
#include "keyword.h" |
|
13 |
|
#include "keyword_list.h" |
|
14 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
15 |
|
#include "namespace/globals.h" |
|
16 |
|
#include "namespace/input.h" |
|
17 |
|
#include "namespace/input.c" |
|
18 |
|
#include "namespace/keyword.h" |
|
19 |
|
#include "namespace/getline.h" |
|
20 |
|
#include "namespace/options.h" |
|
21 |
|
#include "namespace/keyword.h" |
|
22 |
|
#include "namespace/keyword_list.h" |
|
23 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
24 |
|
/*{{{ local */ |
|
25 |
|
/*{{{ pretty_input_file_name */ |
|
26 |
|
/* returns a pretty representation of the input file name, for error and warning messages */ |
|
27 |
|
static u8 *pretty_input_file_name(void) |
|
28 |
|
{ |
|
29 |
|
u8 *fn; |
|
30 |
|
|
|
31 |
|
fn = options->input_file_name; |
|
32 |
|
if (fn != 0) |
|
33 |
|
return fn; |
|
34 |
|
else |
|
35 |
|
return "(standard input)"; |
|
36 |
|
}/*}}}*/ |
|
37 |
|
/*{{{ is_define_declaration */ |
|
38 |
|
/* |
|
39 |
|
* Tests if the given line contains a "%define DECL ARG" declaration. If yes, it sets *ARGP to the |
|
40 |
|
* argument, and returns true. Otherwise, it returns false. |
|
41 |
|
*/ |
|
42 |
|
static bool is_define_declaration(u8 *line, u8 *line_end, u32 lineno, u8 *decl, u8 **argp) |
|
43 |
|
{ |
|
44 |
|
u8 *d; |
|
45 |
|
u8 *arg; |
|
46 |
|
u8 *p; |
|
47 |
|
/* skip '%' */ |
|
48 |
|
++line; |
|
49 |
|
/* skip "define" */ |
|
50 |
|
{ |
|
51 |
|
u8 *d; |
|
52 |
|
d = "define"; |
|
53 |
|
loop { |
|
54 |
|
if (*d == 0) |
|
55 |
|
break; |
|
56 |
|
if (!(line < line_end)) |
|
57 |
|
return false; |
|
58 |
|
if (!(*line == *d)) |
|
59 |
|
return false; |
|
60 |
|
++line; |
|
61 |
|
++d; |
|
62 |
|
} |
|
63 |
|
if (!(line < line_end && (*line == ' ' || *line == '\t'))) |
|
64 |
|
return false; |
|
65 |
|
} |
|
66 |
|
/* skip whitespace */ |
|
67 |
|
loop { |
|
68 |
|
if (line >= line_end || !(*line == ' ' || *line == '\t')) |
|
69 |
|
break; |
|
70 |
|
++line; |
|
71 |
|
} |
|
72 |
|
/* skip DECL */ |
|
73 |
|
d = decl; |
|
74 |
|
loop { |
|
75 |
|
if (*d == 0) |
|
76 |
|
break; |
|
77 |
|
if (!(line < line_end)) |
|
78 |
|
return false; |
|
79 |
|
if (!(*line == *d || (*d == '-' && *line == '_'))) |
|
80 |
|
return false; |
|
81 |
|
++line; |
|
82 |
|
++d; |
|
83 |
|
} |
|
84 |
|
if (line < line_end |
|
85 |
|
&& ((*line >= 'A' && *line <= 'Z') |
|
86 |
|
|| (*line >= 'a' && *line <= 'z') |
|
87 |
|
|| *line == '-' || *line == '_')) |
|
88 |
|
return false; |
|
89 |
|
/* OK, found DECL */ |
|
90 |
|
/* skip whitespace */ |
|
91 |
|
if (!(line < line_end && (*line == ' ' || *line == '\t'))) { |
|
92 |
|
fprintf (stderr, "%s:%u: missing argument in %%define %s ARG declaration.\n", pretty_input_file_name(), lineno, decl); |
|
93 |
|
exit(1); |
|
94 |
|
} |
|
95 |
|
loop { |
|
96 |
|
++line; |
|
97 |
|
if (line >= line_end || !(*line == ' ' || *line == '\t')) |
|
98 |
|
break; |
|
99 |
|
} |
|
100 |
|
/* The next word is the argument */ |
|
101 |
|
arg = calloc(line_end - line + 1, sizeof(u8)); |
|
102 |
|
p = arg; |
|
103 |
|
loop { |
|
104 |
|
if (line >= line_end || (*line == ' ' || *line == '\t' || *line == '\n')) |
|
105 |
|
break; |
|
106 |
|
*p++ = *line++; |
|
107 |
|
} |
|
108 |
|
*p = '\0'; |
|
109 |
|
/* skip whitespace */ |
|
110 |
|
loop { |
|
111 |
|
if (line >= line_end || !(*line == ' ' || *line == '\t')) |
|
112 |
|
break; |
|
113 |
|
++line; |
|
114 |
|
} |
|
115 |
|
/* expect end of line */ |
|
116 |
|
if (line < line_end && *line != '\n') { |
|
117 |
|
fprintf(stderr, "%s:%u: junk after declaration\n", pretty_input_file_name(), lineno); |
|
118 |
|
exit(1); |
|
119 |
|
} |
|
120 |
|
*argp = arg; |
|
121 |
|
return true; |
|
122 |
|
}/*}}}*/ |
|
123 |
|
/*{{{ is_declaration */ |
|
124 |
|
/* returns true if the given line contains a "%DECL" declaration */ |
|
125 |
|
static bool is_declaration(u8 *line, u8 *line_end, u32 lineno, u8 *decl) |
|
126 |
|
{ |
|
127 |
|
u8 *d; |
|
128 |
|
/* skip '%' */ |
|
129 |
|
++line; |
|
130 |
|
/* skip DECL */ |
|
131 |
|
d = decl; |
|
132 |
|
loop { |
|
133 |
|
if (*d == 0) |
|
134 |
|
break; |
|
135 |
|
if (!(line < line_end)) |
|
136 |
|
return false; |
|
137 |
|
if (!(*line == *d || (*d == '-' && *line == '_'))) |
|
138 |
|
return false; |
|
139 |
|
++line; |
|
140 |
|
++d; |
|
141 |
|
} |
|
142 |
|
if (line < line_end |
|
143 |
|
&& ((*line >= 'A' && *line <= 'Z') |
|
144 |
|
|| (*line >= 'a' && *line <= 'z') |
|
145 |
|
|| *line == '-' || *line == '_')) |
|
146 |
|
return false; |
|
147 |
|
/* OK, found DECL. */ |
|
148 |
|
/* skip whitespace */ |
|
149 |
|
loop { |
|
150 |
|
if (line >= line_end || !(*line == ' ' || *line == '\t')) |
|
151 |
|
break; |
|
152 |
|
++line; |
|
153 |
|
} |
|
154 |
|
/* expect end of line */ |
|
155 |
|
if (line < line_end && *line != '\n') { |
|
156 |
|
fprintf(stderr, "%s:%u: junk after declaration\n", pretty_input_file_name(), lineno); |
|
157 |
|
exit(1); |
|
158 |
|
} |
|
159 |
|
return true; |
|
160 |
|
}/*}}}*/ |
|
161 |
|
/*{{{ is_declaration_with_arg */ |
|
162 |
|
/* |
|
163 |
|
* Tests if the given line contains a "%DECL=ARG" declaration. If yes, it sets *ARGP to the |
|
164 |
|
* argument, and returns true. Otherwise, it returns false |
|
165 |
|
*/ |
|
166 |
|
static bool is_declaration_with_arg(u8 *line, u8 *line_end, u32 lineno, u8 *decl, u8 **argp) |
|
167 |
|
{ |
|
168 |
|
u8 *d; |
|
169 |
|
u8 *arg; |
|
170 |
|
u8 *p; |
|
171 |
|
/* skip '%' */ |
|
172 |
|
++line; |
|
173 |
|
|
|
174 |
|
/* skip DECL */ |
|
175 |
|
d = decl; |
|
176 |
|
loop { |
|
177 |
|
if (*d == 0) |
|
178 |
|
break; |
|
179 |
|
if (!(line < line_end)) |
|
180 |
|
return false; |
|
181 |
|
if (!(*line == *d || (*d == '-' && *line == '_'))) |
|
182 |
|
return false; |
|
183 |
|
++line; |
|
184 |
|
++d; |
|
185 |
|
} |
|
186 |
|
if (line < line_end |
|
187 |
|
&& ((*line >= 'A' && *line <= 'Z') |
|
188 |
|
|| (*line >= 'a' && *line <= 'z') |
|
189 |
|
|| *line == '-' || *line == '_')) |
|
190 |
|
return false; |
|
191 |
|
/* OK, found DECL */ |
|
192 |
|
/* skip '=' */ |
|
193 |
|
if (!(line < line_end && *line == '=')) { |
|
194 |
|
fprintf(stderr, "%s:%u: missing argument in %%%s=ARG declaration.\n", pretty_input_file_name(), lineno, decl); |
|
195 |
|
exit(1); |
|
196 |
|
} |
|
197 |
|
++line; |
|
198 |
|
/* the next word is the argument */ |
|
199 |
|
arg = calloc(line_end - line + 1, sizeof(u8)); |
|
200 |
|
p = arg; |
|
201 |
|
loop { |
|
202 |
|
if (line >= line_end || (*line == ' ' || *line == '\t' || *line == '\n')) |
|
203 |
|
break; |
|
204 |
|
*p++ = *line++; |
|
205 |
|
} |
|
206 |
|
*p = '\0'; |
|
207 |
|
/* skip whitespace */ |
|
208 |
|
loop { |
|
209 |
|
if (line >= line_end || !(*line == ' ' || *line == '\t')) |
|
210 |
|
break; |
|
211 |
|
++line; |
|
212 |
|
} |
|
213 |
|
/* expect end of line */ |
|
214 |
|
if (line < line_end && *line != '\n') { |
|
215 |
|
fprintf(stderr, "%s:%u: junk after declaration\n", pretty_input_file_name(), lineno); |
|
216 |
|
exit(1); |
|
217 |
|
} |
|
218 |
|
*argp = arg; |
|
219 |
|
return true; |
|
220 |
|
}/*}}}*/ |
|
221 |
|
/*}}} local -- END */ |
|
222 |
|
/*{{{ input_new */ |
|
223 |
|
static struct Input *input_new(FILE *stream) |
|
224 |
|
{ |
|
225 |
|
struct Input *t; |
|
226 |
|
|
|
227 |
|
t = calloc(1, sizeof(*t)); |
|
228 |
|
t->stream = stream; |
|
229 |
|
return t; |
|
230 |
|
}/*}}}*/ |
|
231 |
|
/*{{{ input_del */ |
|
232 |
|
static void input_del(struct Input *t) |
|
233 |
|
{ |
|
234 |
|
free(t->return_type); |
|
235 |
|
free(t->struct_tag); |
|
236 |
|
free(t->struct_decl); |
|
237 |
|
free(t); |
|
238 |
|
}/*}}}*/ |
|
239 |
|
/*{{{ input_read_input */ |
|
240 |
|
static void input_read(struct Input *t) |
|
241 |
|
{ |
|
242 |
|
/*{{{ documentation |
|
243 |
|
The input file has the following structure: |
|
244 |
|
DECLARATIONS |
|
245 |
|
%% |
|
246 |
|
KEYWORDS |
|
247 |
|
%% |
|
248 |
|
ADDITIONAL_CODE |
|
249 |
|
Since the DECLARATIONS and the ADDITIONAL_CODE sections are optional, |
|
250 |
|
we have to read the entire file in the case there is only one %% |
|
251 |
|
separator line, in order to determine whether the structure is |
|
252 |
|
DECLARATIONS |
|
253 |
|
%% |
|
254 |
|
KEYWORDS |
|
255 |
|
or |
|
256 |
|
KEYWORDS |
|
257 |
|
%% |
|
258 |
|
ADDITIONAL_CODE |
|
259 |
|
When the option -t is given or when the first section contains |
|
260 |
|
declaration lines starting with %, we go for the first interpretation, |
|
261 |
|
otherwise for the second interpretation. }}}*/ |
|
262 |
|
u8 *input; |
|
263 |
|
u32 input_size; |
|
264 |
|
s32 input_length; |
|
265 |
|
u8 *input_end; |
|
266 |
|
|
|
267 |
|
u8 *declarations; |
|
268 |
|
u8 *declarations_end; |
|
269 |
|
u8 *keywords; |
|
270 |
|
u8 *keywords_end; |
|
271 |
|
u32 keywords_lineno; |
|
272 |
|
|
|
273 |
|
input = 0; |
|
274 |
|
input_size = 0; |
|
275 |
|
input_length = get_delim(&input, &input_size, EOF, t->stream); |
|
276 |
|
if (input_length < 0) { |
|
277 |
|
if (ferror(t->stream)) |
|
278 |
|
fprintf(stderr, "%s: error while reading input file\n", pretty_input_file_name()); |
|
279 |
|
else |
|
280 |
|
fprintf(stderr, "%s: The input file is empty!\n", pretty_input_file_name()); |
|
281 |
|
exit(1); |
|
282 |
|
} |
|
283 |
|
/* |
|
284 |
|
* Convert CR/LF line terminators (Windows) to LF line terminators (Unix). GCC 3.3 and |
|
285 |
|
* newer support CR/LF line terminators in C sources on Unix, so we do the same. |
|
286 |
|
* The so-called "text mode" in stdio on Windows translates CR/LF to \n automatically, but |
|
287 |
|
* here we also need this conversion on Unix. As a side effect, on Windows we also parse |
|
288 |
|
* CR/CR/LF into a single \n, but this is not a problem |
|
289 |
|
*/ |
|
290 |
|
{ |
|
291 |
|
u8 *p; |
|
292 |
|
u8 *p_end; |
|
293 |
|
u8 *q; |
|
294 |
|
|
|
295 |
|
p = input; |
|
296 |
|
p_end = input + input_length; |
|
297 |
|
/* converting the initial segment without CRs is a no-op */ |
|
298 |
|
loop { |
|
299 |
|
if (p >= p_end || *p == '\r') |
|
300 |
|
break; |
|
301 |
|
++p; |
|
302 |
|
} |
|
303 |
|
/* then start the conversion for real */ |
|
304 |
|
q = p; |
|
305 |
|
loop { |
|
306 |
|
if (p >= p_end) |
|
307 |
|
break; |
|
308 |
|
if (p[0] == '\r' && p + 1 < p_end && p[1] == '\n') |
|
309 |
|
++p; |
|
310 |
|
*q++ = *p++; |
|
311 |
|
} |
|
312 |
|
input_length = (s32)(q - input); |
|
313 |
|
} |
|
314 |
|
/* |
|
315 |
|
* We use input_end as a limit, in order to cope with NUL bytes in the input. But note that |
|
316 |
|
* one trailing NUL byte has been added after input_end, for convenience |
|
317 |
|
*/ |
|
318 |
|
input_end = input + input_length; |
|
319 |
|
/* break up the input into the three sections */ |
|
320 |
|
{ |
|
321 |
|
u8 *separator[2]; |
|
322 |
|
u32 separator_lineno[2]; |
|
323 |
|
s32 separators; |
|
324 |
|
bool has_declarations; |
|
325 |
|
|
|
326 |
|
separator[0] = 0; |
|
327 |
|
separator[1] = 0; |
|
328 |
|
separator_lineno[0] = 0; |
|
329 |
|
separator_lineno[1] = 0; |
|
330 |
|
separators = 0; |
|
331 |
|
{ |
|
332 |
|
u32 lineno; |
|
333 |
|
u8 *p; |
|
334 |
|
|
|
335 |
|
lineno = 1; |
|
336 |
|
p = input; |
|
337 |
|
loop { |
|
338 |
|
if (p >= input_end) |
|
339 |
|
break; |
|
340 |
|
if (p[0] == '%' && p[1] == '%') { |
|
341 |
|
separator[separators] = p; |
|
342 |
|
separator_lineno[separators] = lineno; |
|
343 |
|
++separators; |
|
344 |
|
if (separators == 2) |
|
345 |
|
break; |
|
346 |
|
} |
|
347 |
|
++lineno; |
|
348 |
|
p = (u8*)memchr(p, '\n', input_end - p); |
|
349 |
|
if (p != 0) |
|
350 |
|
++p; |
|
351 |
|
else |
|
352 |
|
p = input_end; |
|
353 |
|
} |
|
354 |
|
} |
|
355 |
|
if (separators == 1) { |
|
356 |
|
if (OPTS(TYPE)) |
|
357 |
|
has_declarations = true; |
|
358 |
|
else { |
|
359 |
|
u8 *p; |
|
360 |
|
|
|
361 |
|
has_declarations = false; |
|
362 |
|
p = input; |
|
363 |
|
loop { |
|
364 |
|
if (p >= separator[0]) |
|
365 |
|
break; |
|
366 |
|
if (p[0] == '%') { |
|
367 |
|
has_declarations = true; |
|
368 |
|
break; |
|
369 |
|
} |
|
370 |
|
p = (u8*)memchr(p, '\n', |
|
371 |
|
separator[0] - p); |
|
372 |
|
if (p != 0) |
|
373 |
|
++p; |
|
374 |
|
else |
|
375 |
|
p = separator[0]; |
|
376 |
|
} |
|
377 |
|
} |
|
378 |
|
} else |
|
379 |
|
has_declarations = (separators > 0); |
|
380 |
|
if (has_declarations) { |
|
381 |
|
bool nonempty_line; |
|
382 |
|
u8 *p; |
|
383 |
|
|
|
384 |
|
declarations = input; |
|
385 |
|
declarations_end = separator[0]; |
|
386 |
|
/* give a warning if the separator line is nonempty */ |
|
387 |
|
nonempty_line = false; |
|
388 |
|
p = declarations_end + 2; |
|
389 |
|
loop { |
|
390 |
|
if (p >= input_end) |
|
391 |
|
break; |
|
392 |
|
if (*p == '\n') { |
|
393 |
|
++p; |
|
394 |
|
break; |
|
395 |
|
} |
|
396 |
|
if (!(*p == ' ' || *p == '\t')) |
|
397 |
|
nonempty_line = true; |
|
398 |
|
++p; |
|
399 |
|
} |
|
400 |
|
if (nonempty_line) |
|
401 |
|
fprintf(stderr, "%s:%u: warning: junk after %%%% is ignored\n", pretty_input_file_name(), separator_lineno[0]); |
|
402 |
|
keywords = p; |
|
403 |
|
keywords_lineno = separator_lineno[0] + 1; |
|
404 |
|
} else { |
|
405 |
|
declarations = 0; |
|
406 |
|
declarations_end = 0; |
|
407 |
|
keywords = input; |
|
408 |
|
keywords_lineno = 1; |
|
409 |
|
} |
|
410 |
|
if (separators > (has_declarations ? 1 : 0)) { |
|
411 |
|
keywords_end = separator[separators - 1]; |
|
412 |
|
t->verbatim_code = separator[separators - 1] + 2; |
|
413 |
|
t->verbatim_code_end = input_end; |
|
414 |
|
t->verbatim_code_lineno = separator_lineno[separators - 1]; |
|
415 |
|
} else { |
|
416 |
|
keywords_end = input_end; |
|
417 |
|
t->verbatim_code = 0; |
|
418 |
|
t->verbatim_code_end = 0; |
|
419 |
|
t->verbatim_code_lineno = 0; |
|
420 |
|
} |
|
421 |
|
} |
|
422 |
|
/* parse the declarations section */ |
|
423 |
|
t->verbatim_declarations = 0; |
|
424 |
|
t->verbatim_declarations_end = 0; |
|
425 |
|
t->verbatim_declarations_lineno = 0; |
|
426 |
|
t->struct_decl = 0; |
|
427 |
|
t->struct_decl_lineno = 0; |
|
428 |
|
t->return_type = 0; |
|
429 |
|
t->struct_tag = 0; |
|
430 |
|
{ |
|
431 |
|
u32 lineno; |
|
432 |
|
u8 *struct_decl; |
|
433 |
|
u32 *struct_decl_linenos; |
|
434 |
|
u32 struct_decl_linecount; |
|
435 |
|
u8 *line; |
|
436 |
|
|
|
437 |
|
lineno = 1; |
|
438 |
|
struct_decl = NULL; |
|
439 |
|
struct_decl_linenos = NULL; |
|
440 |
|
struct_decl_linecount = 0; |
|
441 |
|
|
|
442 |
|
line = declarations; |
|
443 |
|
loop { |
|
444 |
|
u8 *line_end; |
|
445 |
|
|
|
446 |
|
if (line >= declarations_end) |
|
447 |
|
break; |
|
448 |
|
line_end = (u8*)memchr(line, '\n', declarations_end - line); |
|
449 |
|
if (line_end != 0) |
|
450 |
|
++line_end; |
|
451 |
|
else |
|
452 |
|
line_end = declarations_end; |
|
453 |
|
|
|
454 |
|
if (*line == '%') { |
|
455 |
|
if (line[1] == '{') { |
|
456 |
|
/* handle %{ */ |
|
457 |
|
if (t->verbatim_declarations != 0) { |
|
458 |
|
fprintf(stderr, "%s:%u:\n%s:%u:only one %%{...%%} section is allowed\n", pretty_input_file_name(), t->verbatim_declarations_lineno, pretty_input_file_name(), lineno); |
|
459 |
|
exit(1); |
|
460 |
|
} |
|
461 |
|
t->verbatim_declarations = line + 2; |
|
462 |
|
t->verbatim_declarations_lineno = lineno; |
|
463 |
|
} else if (line[1] == '}') { |
|
464 |
|
/* handle %} */ |
|
465 |
|
bool nonempty_line; |
|
466 |
|
u8 *q; |
|
467 |
|
if (t->verbatim_declarations == 0) { |
|
468 |
|
fprintf(stderr, "%s:%u: %%} outside of %%{...%%} section\n", pretty_input_file_name(), lineno); |
|
469 |
|
exit(1); |
|
470 |
|
} |
|
471 |
|
if (t->verbatim_declarations_end != 0) { |
|
472 |
|
fprintf(stderr, "%s:%u: %%{...%%} section already closed\n", pretty_input_file_name(), lineno); |
|
473 |
|
exit(1); |
|
474 |
|
} |
|
475 |
|
t->verbatim_declarations_end = line; |
|
476 |
|
/* give a warning if the rest of the line is nonempty */ |
|
477 |
|
nonempty_line = false; |
|
478 |
|
q = line + 2; |
|
479 |
|
loop { |
|
480 |
|
if (q >= line_end) |
|
481 |
|
break; |
|
482 |
|
if (*q == '\n') { |
|
483 |
|
++q; |
|
484 |
|
break; |
|
485 |
|
} |
|
486 |
|
if (!(*q == ' ' || *q == '\t')) |
|
487 |
|
nonempty_line = true; |
|
488 |
|
++q; |
|
489 |
|
} |
|
490 |
|
if (nonempty_line) |
|
491 |
|
fprintf(stderr, "%s:%u: warning: junk after %%} is ignored\n", pretty_input_file_name(), lineno); |
|
492 |
|
} else if (t->verbatim_declarations != 0 |
|
493 |
|
&& t->verbatim_declarations_end == 0) { |
|
494 |
|
fprintf (stderr, "%s:%u: warning: %% directives are ignored" " inside the %%{...%%} section\n", pretty_input_file_name(), lineno); |
|
495 |
|
} else { |
|
496 |
|
u8 *arg; |
|
497 |
|
|
|
498 |
|
#define OPT_SET(x) options->option_word |= OPTS_##x |
|
499 |
|
if (is_declaration_with_arg(line, line_end, lineno, "delimiters", &arg)) |
|
500 |
|
opts_set_delimiters(options, arg); |
|
501 |
|
else |
|
502 |
|
|
|
503 |
|
if (is_declaration(line, line_end, lineno, "struct-type")) |
|
504 |
|
OPT_SET(TYPE); |
|
505 |
|
else |
|
506 |
|
|
|
507 |
|
if (is_declaration(line, line_end, lineno, "ignore-case")) |
|
508 |
|
OPT_SET(UPPERLOWER); |
|
509 |
|
else |
|
510 |
|
|
|
511 |
|
if (is_declaration_with_arg(line, line_end, lineno, "language", &arg)) |
|
512 |
|
opts_set_language(options, arg); |
|
513 |
|
else |
|
514 |
|
|
|
515 |
|
if (is_define_declaration(line, line_end, lineno, "slot-name", &arg)) |
|
516 |
|
opts_set_slot_name(options, arg); |
|
517 |
|
else |
|
518 |
|
|
|
519 |
|
if (is_define_declaration(line, line_end, lineno, "initializer-suffix", &arg)) |
|
520 |
|
opts_set_initializer_suffix(options, arg); |
|
521 |
|
else |
|
522 |
|
|
|
523 |
|
if (is_define_declaration(line, line_end, lineno, "hash-function-name", &arg)) |
|
524 |
|
opts_set_hash_name(options, arg); |
|
525 |
|
else |
|
526 |
|
|
|
527 |
|
if (is_define_declaration(line, line_end, lineno, "lookup-function-name", &arg)) |
|
528 |
|
opts_set_function_name(options, arg); |
|
529 |
|
else |
|
530 |
|
|
|
531 |
|
if (is_define_declaration(line, line_end, lineno, "class-name", &arg)) |
|
532 |
|
opts_set_class_name(options, arg); |
|
533 |
|
else |
|
534 |
|
|
|
535 |
|
if (is_declaration(line, line_end, lineno, "7bit")) |
|
536 |
|
OPT_SET(SEVENBIT); |
|
537 |
|
else |
|
538 |
|
|
|
539 |
|
if (is_declaration(line, line_end, lineno, "compare-lengths")) |
|
540 |
|
OPT_SET(LENTABLE); |
|
541 |
|
else |
|
542 |
|
|
|
543 |
|
if (is_declaration (line, line_end, lineno, "compare-strncmp")) |
|
544 |
|
OPT_SET(COMP); |
|
545 |
|
else |
|
546 |
|
|
|
547 |
|
if (is_declaration(line, line_end, lineno, "readonly-tables")) |
|
548 |
|
OPT_SET(CONST); |
|
549 |
|
else |
|
550 |
|
|
|
551 |
|
if (is_declaration(line, line_end, lineno, "enum")) |
|
552 |
|
OPT_SET(ENUM); |
|
553 |
|
else |
|
554 |
|
|
|
555 |
|
if (is_declaration(line, line_end, lineno, "includes")) |
|
556 |
|
OPT_SET(INCLUDE); |
|
557 |
|
else |
|
558 |
|
|
|
559 |
|
if (is_declaration(line, line_end, lineno, "global-table")) |
|
560 |
|
OPT_SET(GLOBAL); |
|
561 |
|
else |
|
562 |
|
|
|
563 |
|
if (is_declaration(line, line_end, lineno, "pic")) |
|
564 |
|
OPT_SET(SHAREDLIB); |
|
565 |
|
else |
|
566 |
|
|
|
567 |
|
if (is_define_declaration(line, line_end, lineno, "string-pool-name", &arg)) |
|
568 |
|
opts_set_stringpool_name(options, arg); |
|
569 |
|
else |
|
570 |
|
|
|
571 |
|
if (is_declaration(line, line_end, lineno, "null-strings")) |
|
572 |
|
OPT_SET(NULLSTRINGS); |
|
573 |
|
else |
|
574 |
|
|
|
575 |
|
if (is_define_declaration(line, line_end, lineno, "constants-prefix", &arg)) |
|
576 |
|
opts_set_constants_prefix(options, arg); |
|
577 |
|
else |
|
578 |
|
|
|
579 |
|
if (is_define_declaration(line, line_end, lineno, "word-array-name", &arg)) |
|
580 |
|
opts_set_wordlist_name(options, arg); |
|
581 |
|
else |
|
582 |
|
|
|
583 |
|
if (is_define_declaration(line, line_end, lineno, "length-table-name", &arg)) |
|
584 |
|
opts_set_lengthtable_name(options, arg); |
|
585 |
|
else |
|
586 |
|
|
|
587 |
|
if (is_declaration_with_arg(line, line_end, lineno, "switch", &arg)) { |
|
588 |
|
opts_set_total_switches(options, atoi(arg)); |
|
589 |
|
if (options->total_switches <= 0) { |
|
590 |
|
fprintf (stderr, "%s:%u: number of switches %s must be a positive number\n", pretty_input_file_name(), lineno, arg); |
|
591 |
|
exit(1); |
|
592 |
|
} |
|
593 |
|
} |
|
594 |
|
else |
|
595 |
|
|
|
596 |
|
if (is_declaration(line, line_end, lineno, "omit-struct-type")) |
|
597 |
|
OPT_SET(NOTYPE); |
|
598 |
|
else { |
|
599 |
|
fprintf (stderr, "%s:%u: unrecognized %% directive\n", pretty_input_file_name(), lineno); |
|
600 |
|
exit(1); |
|
601 |
|
} |
|
602 |
|
#undef OPT_SET |
|
603 |
|
} |
|
604 |
|
} else if (!(t->verbatim_declarations != 0 |
|
605 |
|
&& t->verbatim_declarations_end == 0)) { |
|
606 |
|
/* append the line to struct_decl */ |
|
607 |
|
u32 old_len; |
|
608 |
|
u32 line_len; |
|
609 |
|
u32 new_len; |
|
610 |
|
u8 *new_struct_decl; |
|
611 |
|
u32 *new_struct_decl_linenos; |
|
612 |
|
|
|
613 |
|
old_len = (struct_decl ? strlen(struct_decl) : 0); |
|
614 |
|
line_len = line_end - line; |
|
615 |
|
new_len = old_len + line_len + 1; |
|
616 |
|
new_struct_decl = calloc(new_len, sizeof(u8)); |
|
617 |
|
if (old_len > 0) |
|
618 |
|
memcpy(new_struct_decl, struct_decl, old_len); |
|
619 |
|
memcpy(new_struct_decl + old_len, line, line_len); |
|
620 |
|
new_struct_decl[old_len + line_len] = '\0'; |
|
621 |
|
if (struct_decl != 0) |
|
622 |
|
free(struct_decl); |
|
623 |
|
struct_decl = new_struct_decl; |
|
624 |
|
/* append the lineno to struct_decl_linenos */ |
|
625 |
|
new_struct_decl_linenos = calloc(struct_decl_linecount + 1, |
|
626 |
|
sizeof(u32)); |
|
627 |
|
if (struct_decl_linecount > 0) |
|
628 |
|
memcpy(new_struct_decl_linenos, struct_decl_linenos, |
|
629 |
|
struct_decl_linecount * sizeof(u32)); |
|
630 |
|
new_struct_decl_linenos[struct_decl_linecount] = lineno; |
|
631 |
|
if (struct_decl_linenos) |
|
632 |
|
free(struct_decl_linenos); |
|
633 |
|
struct_decl_linenos = new_struct_decl_linenos; |
|
634 |
|
/* increment struct_decl_linecount */ |
|
635 |
|
++struct_decl_linecount; |
|
636 |
|
} |
|
637 |
|
++lineno; |
|
638 |
|
line = line_end; |
|
639 |
|
} |
|
640 |
|
if (t->verbatim_declarations != 0 && t->verbatim_declarations_end == 0) { |
|
641 |
|
fprintf(stderr, "%s:%u: unterminated %%{ section\n", pretty_input_file_name(), t->verbatim_declarations_lineno); |
|
642 |
|
exit(1); |
|
643 |
|
} |
|
644 |
|
/* determine _struct_decl, _return_type, _struct_tag */ |
|
645 |
|
if (OPTS(TYPE)) { |
|
646 |
|
u8 *p; |
|
647 |
|
u32 struct_tag_length; |
|
648 |
|
u8 *struct_tag; |
|
649 |
|
u8 *return_type; |
|
650 |
|
|
|
651 |
|
if (struct_decl != 0) { |
|
652 |
|
/* drop leading whitespace and comments */ |
|
653 |
|
{ |
|
654 |
|
u8 *p; |
|
655 |
|
u32 *l; |
|
656 |
|
|
|
657 |
|
p = struct_decl; |
|
658 |
|
l = struct_decl_linenos; |
|
659 |
|
loop { |
|
660 |
|
if (p[0] == ' ' || p[0] == '\t') { |
|
661 |
|
++p; |
|
662 |
|
continue; |
|
663 |
|
} |
|
664 |
|
if (p[0] == '\n') { |
|
665 |
|
++l; |
|
666 |
|
++p; |
|
667 |
|
continue; |
|
668 |
|
} |
|
669 |
|
if (p[0] == '/') { |
|
670 |
|
if (p[1] == '*') { |
|
671 |
|
/* skip over ANSI C style comment */ |
|
672 |
|
p += 2; |
|
673 |
|
loop { |
|
674 |
|
if (p[0] == '\0') |
|
675 |
|
break; |
|
676 |
|
if (p[0] == '*' |
|
677 |
|
&& p[1] == '/') { |
|
678 |
|
p += 2; |
|
679 |
|
break; |
|
680 |
|
} |
|
681 |
|
if (p[0] == '\n') |
|
682 |
|
++l; |
|
683 |
|
++p; |
|
684 |
|
} |
|
685 |
|
continue; |
|
686 |
|
} |
|
687 |
|
if (p[1] == '/') { |
|
688 |
|
/* skip over ISO C99 or C++ style comment */ |
|
689 |
|
p += 2; |
|
690 |
|
loop { |
|
691 |
|
if (p[0] == '\0' |
|
692 |
|
|| p[0] == '\n') |
|
693 |
|
break; |
|
694 |
|
++p; |
|
695 |
|
} |
|
696 |
|
if (p[0] == '\n') { |
|
697 |
|
++l; |
|
698 |
|
++p; |
|
699 |
|
} |
|
700 |
|
continue; |
|
701 |
|
} |
|
702 |
|
} |
|
703 |
|
break; |
|
704 |
|
} |
|
705 |
|
if (p != struct_decl) { |
|
706 |
|
u32 len; |
|
707 |
|
u8 *new_struct_decl; |
|
708 |
|
|
|
709 |
|
len = strlen(p); |
|
710 |
|
new_struct_decl = calloc(len + 1, sizeof(u8)); |
|
711 |
|
memcpy(new_struct_decl, p, len + 1); |
|
712 |
|
free(struct_decl); |
|
713 |
|
struct_decl = new_struct_decl; |
|
714 |
|
} |
|
715 |
|
t->struct_decl_lineno = *l; |
|
716 |
|
} |
|
717 |
|
/* drop trailing whitespace */ |
|
718 |
|
p = struct_decl + strlen(struct_decl); |
|
719 |
|
loop { |
|
720 |
|
if (p <= struct_decl) |
|
721 |
|
break; |
|
722 |
|
if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t') |
|
723 |
|
*--p = '\0'; |
|
724 |
|
else |
|
725 |
|
break; |
|
726 |
|
} |
|
727 |
|
} |
|
728 |
|
if (struct_decl == 0 || struct_decl[0] == '\0') { |
|
729 |
|
fprintf (stderr, "%s: missing struct declaration for option --struct-type\n", pretty_input_file_name()); |
|
730 |
|
exit(1); |
|
731 |
|
} |
|
732 |
|
{ |
|
733 |
|
/* ensure trailing semicolon */ |
|
734 |
|
u32 old_len; |
|
735 |
|
|
|
736 |
|
old_len = strlen(struct_decl); |
|
737 |
|
if (struct_decl[old_len - 1] != ';') { |
|
738 |
|
u8 *new_struct_decl; |
|
739 |
|
|
|
740 |
|
new_struct_decl = calloc(old_len + 2, sizeof(u8)); |
|
741 |
|
memcpy(new_struct_decl, struct_decl, old_len); |
|
742 |
|
new_struct_decl[old_len] = ';'; |
|
743 |
|
new_struct_decl[old_len + 1] = '\0'; |
|
744 |
|
free(struct_decl); |
|
745 |
|
struct_decl = new_struct_decl; |
|
746 |
|
} |
|
747 |
|
} |
|
748 |
|
/* set _struct_decl to the entire declaration */ |
|
749 |
|
t->struct_decl = struct_decl; |
|
750 |
|
/* set _struct_tag to the naked "struct something" */ |
|
751 |
|
p = struct_decl; |
|
752 |
|
loop { |
|
753 |
|
if (*p == 0 || *p == '{' || *p == ';' || *p == '\n') |
|
754 |
|
break; |
|
755 |
|
++p; |
|
756 |
|
} |
|
757 |
|
loop { |
|
758 |
|
if (p <= struct_decl) |
|
759 |
|
break; |
|
760 |
|
if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t') |
|
761 |
|
p--; |
|
762 |
|
else |
|
763 |
|
break; |
|
764 |
|
} |
|
765 |
|
struct_tag_length = p - struct_decl; |
|
766 |
|
struct_tag = calloc(struct_tag_length + 1, sizeof(u8)); |
|
767 |
|
memcpy(struct_tag, struct_decl, struct_tag_length); |
|
768 |
|
struct_tag[struct_tag_length] = '\0'; |
|
769 |
|
t->struct_tag = struct_tag; |
|
770 |
|
/* |
|
771 |
|
* The return type of the lookup function is "struct something *". No |
|
772 |
|
* "const" here, because if !option[CONST], some user code might want to |
|
773 |
|
* modify the structure. |
|
774 |
|
*/ |
|
775 |
|
return_type = calloc(struct_tag_length + 3, sizeof(u8)); |
|
776 |
|
memcpy(return_type, struct_decl, struct_tag_length); |
|
777 |
|
return_type[struct_tag_length] = ' '; |
|
778 |
|
return_type[struct_tag_length + 1] = '*'; |
|
779 |
|
return_type[struct_tag_length + 2] = '\0'; |
|
780 |
|
t->return_type = return_type; |
|
781 |
|
} |
|
782 |
|
if (struct_decl_linenos != 0) |
|
783 |
|
free(struct_decl_linenos); |
|
784 |
|
} |
|
785 |
|
/* parse the keywords section */ |
|
786 |
|
{ |
|
787 |
|
struct Keyword_List **list_tail; |
|
788 |
|
u8 *delimiters; |
|
789 |
|
u32 lineno; |
|
790 |
|
bool charset_dependent; |
|
791 |
|
u8 *line; |
|
792 |
|
|
|
793 |
|
list_tail = &t->head; |
|
794 |
|
delimiters = options->delimiters; |
|
795 |
|
lineno = keywords_lineno; |
|
796 |
|
charset_dependent = false; |
|
797 |
|
line = keywords; |
|
798 |
|
loop { |
|
799 |
|
u8 *line_end; |
|
800 |
|
|
|
801 |
|
if (line >= keywords_end) |
|
802 |
|
break; |
|
803 |
|
line_end = memchr(line, '\n', keywords_end - line); |
|
804 |
|
if (line_end != 0) |
|
805 |
|
++line_end; |
|
806 |
|
else |
|
807 |
|
line_end = keywords_end; |
|
808 |
|
if (line[0] == '#') |
|
809 |
|
; /* comment line */ |
|
810 |
|
else if (line[0] == '%') { |
|
811 |
|
fprintf(stderr, "%s:%u: declarations are not allowed in the keywords section.\nTo declare a keyword starting with %%, enclose it in double-quotes.\n", pretty_input_file_name(), lineno); |
|
812 |
|
exit(1); |
|
813 |
|
} else { |
|
814 |
|
/* an input line carrying a keyword */ |
|
815 |
|
u8 *keyword; |
|
816 |
|
u32 keyword_length; |
|
817 |
|
u8 *rest; |
|
818 |
|
struct Keyword *new_kw; |
|
819 |
|
|
|
820 |
|
if (line[0] == '"') { |
|
821 |
|
/* parse a string in ANSI C syntax */ |
|
822 |
|
u8 *kp; |
|
823 |
|
u8 *lp; |
|
824 |
|
|
|
825 |
|
kp = calloc(line_end - line, sizeof(u8)); |
|
826 |
|
keyword = kp; |
|
827 |
|
lp = line + 1; |
|
828 |
|
loop { |
|
829 |
|
u8 c; |
|
830 |
|
|
|
831 |
|
if (lp == line_end) { |
|
832 |
|
fprintf(stderr, "%s:%u: unterminated string\n", pretty_input_file_name(), lineno); |
|
833 |
|
exit(1); |
|
834 |
|
} |
|
835 |
|
c = *lp; |
|
836 |
|
if (c == '\\') { |
|
837 |
|
c = *++lp; |
|
838 |
|
switch (c) { |
|
839 |
|
case '0': case '1': case '2': case '3': |
|
840 |
|
case '4': case '5': case '6': case '7':{ |
|
841 |
|
s32 code; |
|
842 |
|
s32 count; |
|
843 |
|
|
|
844 |
|
code = 0; |
|
845 |
|
count = 0; |
|
846 |
|
loop { |
|
847 |
|
if (count >= 3 || *lp == '0' || *lp > '7') |
|
848 |
|
break; |
|
849 |
|
code = (code << 3) + (*lp - '0'); |
|
850 |
|
++lp; |
|
851 |
|
++count; |
|
852 |
|
} |
|
853 |
|
if (code > UCHAR_MAX) |
|
854 |
|
fprintf(stderr, "%s:%u: octal escape out of range\n", pretty_input_file_name(), lineno); |
|
855 |
|
*kp = (u8)code; |
|
856 |
|
break;} |
|
857 |
|
case 'x':{ |
|
858 |
|
s32 code; |
|
859 |
|
s32 count; |
|
860 |
|
|
|
861 |
|
code = 0; |
|
862 |
|
count = 0; |
|
863 |
|
++lp; |
|
864 |
|
loop { |
|
865 |
|
if (!(*lp >= '0' && *lp <= '9') || !(*lp >= 'A' && *lp <= 'F') || !(*lp >= 'a' && *lp <= 'f')) |
|
866 |
|
break; |
|
867 |
|
code = (code << 4) |
|
868 |
|
+ (*lp >= 'A' && *lp <= 'F' |
|
869 |
|
? *lp - 'A' + 10 : |
|
870 |
|
*lp >= 'a' && *lp <= 'f' |
|
871 |
|
? *lp - 'a' + 10 : |
|
872 |
|
*lp - '0'); |
|
873 |
|
++lp; |
|
874 |
|
++count; |
|
875 |
|
} |
|
876 |
|
if (count == 0) |
|
877 |
|
fprintf(stderr, "%s:%u: hexadecimal escape without any hex digits\n", pretty_input_file_name(), lineno); |
|
878 |
|
if (code > UCHAR_MAX) |
|
879 |
|
fprintf(stderr, "%s:%u: hexadecimal escape out of range\n", pretty_input_file_name(), lineno); |
|
880 |
|
*kp = (u8)code; |
|
881 |
|
break;} |
|
882 |
|
case '\\': case '\'': case '"': |
|
883 |
|
*kp = c; |
|
884 |
|
++lp; |
|
885 |
|
charset_dependent = true; |
|
886 |
|
break; |
|
887 |
|
case 'n': |
|
888 |
|
*kp = '\n'; |
|
889 |
|
++lp; |
|
890 |
|
charset_dependent = true; |
|
891 |
|
break; |
|
892 |
|
case 't': |
|
893 |
|
*kp = '\t'; |
|
894 |
|
++lp; |
|
895 |
|
charset_dependent = true; |
|
896 |
|
break; |
|
897 |
|
case 'r': |
|
898 |
|
*kp = '\r'; |
|
899 |
|
++lp; |
|
900 |
|
charset_dependent = true; |
|
901 |
|
break; |
|
902 |
|
case 'f': |
|
903 |
|
*kp = '\f'; |
|
904 |
|
++lp; |
|
905 |
|
charset_dependent = true; |
|
906 |
|
break; |
|
907 |
|
case 'b': |
|
908 |
|
*kp = '\b'; |
|
909 |
|
++lp; |
|
910 |
|
charset_dependent = true; |
|
911 |
|
break; |
|
912 |
|
case 'a': |
|
913 |
|
*kp = '\a'; |
|
914 |
|
++lp; |
|
915 |
|
charset_dependent = true; |
|
916 |
|
break; |
|
917 |
|
case 'v': |
|
918 |
|
*kp = '\v'; |
|
919 |
|
++lp; |
|
920 |
|
charset_dependent = true; |
|
921 |
|
break; |
|
922 |
|
default: |
|
923 |
|
fprintf(stderr, "%s:%u: invalid escape sequence in string\n", pretty_input_file_name(), lineno); |
|
924 |
|
exit (1); |
|
925 |
|
} |
|
926 |
|
} else if (c == '"') |
|
927 |
|
break; |
|
928 |
|
else { |
|
929 |
|
*kp = c; |
|
930 |
|
++lp; |
|
931 |
|
charset_dependent = true; |
|
932 |
|
} |
|
933 |
|
++kp; |
|
934 |
|
} |
|
935 |
|
++lp; |
|
936 |
|
if (lp < line_end && *lp != '\n') { |
|
937 |
|
if (strchr(delimiters, *lp) == 0) { |
|
938 |
|
fprintf(stderr, "%s:%u: string not followed by delimiter\n", pretty_input_file_name(), lineno); |
|
939 |
|
exit (1); |
|
940 |
|
} |
|
941 |
|
++lp; |
|
942 |
|
} |
|
943 |
|
keyword_length = kp - keyword; |
|
944 |
|
if (OPTS(TYPE)) { |
|
945 |
|
u8 *line_rest; |
|
946 |
|
|
|
947 |
|
line_rest = calloc(line_end - lp + 1, sizeof(u8)); |
|
948 |
|
memcpy(line_rest, lp, line_end - lp ); |
|
949 |
|
line_rest[line_end - lp - (line_end > lp && line_end[-1] == '\n' ? 1 : 0)] = '\0'; |
|
950 |
|
rest = line_rest; |
|
951 |
|
} else |
|
952 |
|
rest = empty_string; |
|
953 |
|
} else { |
|
954 |
|
/* Not a string. Look for the delimiter. */ |
|
955 |
|
u8 *lp; |
|
956 |
|
|
|
957 |
|
lp = line; |
|
958 |
|
loop { |
|
959 |
|
if (!(lp < line_end && *lp != '\n')) { |
|
960 |
|
keyword = line; |
|
961 |
|
keyword_length = lp - line; |
|
962 |
|
rest = empty_string; |
|
963 |
|
break; |
|
964 |
|
} |
|
965 |
|
if (strchr(delimiters, *lp) != 0) { |
|
966 |
|
keyword = line; |
|
967 |
|
keyword_length = lp - line; |
|
968 |
|
++lp; |
|
969 |
|
if ((cgperf_options->option_word & OPTS_TYPE) != 0) { |
|
970 |
|
u8 *line_rest; |
|
971 |
|
|
|
972 |
|
line_rest = calloc(line_end - lp + 1, sizeof(u8)); |
|
973 |
|
memcpy(line_rest, lp, line_end - lp); |
|
974 |
|
line_rest[line_end - lp - (line_end > lp && line_end[-1] == '\n' ? 1 : 0)] = '\0'; |
|
975 |
|
rest = line_rest; |
|
976 |
|
} else |
|
977 |
|
rest = empty_string; |
|
978 |
|
break; |
|
979 |
|
} |
|
980 |
|
++lp; |
|
981 |
|
} |
|
982 |
|
if (keyword_length > 0) |
|
983 |
|
charset_dependent = true; |
|
984 |
|
} |
|
985 |
|
/* allocate Keyword and add it to the list */ |
|
986 |
|
new_kw = kw_new(keyword, keyword_length, rest, lineno); |
|
987 |
|
*list_tail = kwl_new(new_kw); |
|
988 |
|
list_tail = &(*list_tail)->next; |
|
989 |
|
} |
|
990 |
|
++lineno; |
|
991 |
|
line = line_end; |
|
992 |
|
} |
|
993 |
|
*list_tail = 0; |
|
994 |
|
if (t->head == 0) { |
|
995 |
|
fprintf (stderr, "%s: No keywords in input file!\n", pretty_input_file_name()); |
|
996 |
|
exit(1); |
|
997 |
|
} |
|
998 |
|
t->charset_dependent = charset_dependent; |
|
999 |
|
} |
|
1000 |
|
/* to be freed in the destructor */ |
|
1001 |
|
t->input = input; |
|
1002 |
|
t->input_end = input_end; |
|
1003 |
|
}/*}}}*/ |
|
1004 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
1005 |
|
#define EPILOG |
|
1006 |
|
#include "namespace/globals.h" |
|
1007 |
|
#include "namespace/input.h" |
|
1008 |
|
#include "namespace/input.c" |
|
1009 |
|
#include "namespace/keyword.h" |
|
1010 |
|
#include "namespace/getline.h" |
|
1011 |
|
#include "namespace/options.h" |
|
1012 |
|
#include "namespace/keyword.h" |
|
1013 |
|
#include "namespace/keyword_list.h" |
|
1014 |
|
#undef EPILOG |
|
1015 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
1016 |
|
#endif |
File options.c added (mode: 100644) (index 0000000..1fb9da3) |
|
1 |
|
#ifndef GPERF_OPTIONS_C |
|
2 |
|
#define GPERF_OPTIONS_C |
|
3 |
|
#include <limits.h> |
|
4 |
|
#include <stdio.h> |
|
5 |
|
#include <stdlib.h> |
|
6 |
|
#include <getopt.h> |
|
7 |
|
#include <ctype.h> |
|
8 |
|
#include <string.h> |
|
9 |
|
|
|
10 |
|
#include "globals.h" |
|
11 |
|
#include "options.h" |
|
12 |
|
#include "version.h" |
|
13 |
|
#include "positions.h" |
|
14 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
15 |
|
#include "namespace/globals.h" |
|
16 |
|
#include "namespace/options.h" |
|
17 |
|
#include "namespace/positions.h" |
|
18 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
19 |
|
/*{{{ defaults */ |
|
20 |
|
/* default struct initializer suffix */ |
|
21 |
|
static u8 *DEFAULT_INITIALIZER_SUFFIX = ""; |
|
22 |
|
/* default name for the key component */ |
|
23 |
|
static u8 *DEFAULT_SLOT_NAME = "name"; |
|
24 |
|
/* default delimiters that separate keywords from their attributes */ |
|
25 |
|
static u8 *DEFAULT_DELIMITERS = ","; |
|
26 |
|
/* default name for generated hash function */ |
|
27 |
|
static u8 *DEFAULT_HASH_NAME = "hash"; |
|
28 |
|
/* default name for generated lookup function */ |
|
29 |
|
static u8 *DEFAULT_FUNCTION_NAME = "in_word_set"; |
|
30 |
|
/* default name for the generated class */ |
|
31 |
|
static u8 *DEFAULT_CLASS_NAME = "Perfect_Hash"; |
|
32 |
|
/* default name for string pool */ |
|
33 |
|
static u8 *DEFAULT_STRINGPOOL_NAME = "stringpool"; |
|
34 |
|
/* default prefix for constants */ |
|
35 |
|
static u8 *DEFAULT_CONSTANTS_PREFIX = ""; |
|
36 |
|
/* default name for generated hash table array */ |
|
37 |
|
static u8 *DEFAULT_WORDLIST_NAME = "wordlist"; |
|
38 |
|
/* default name for generated length table array */ |
|
39 |
|
static u8 *DEFAULT_LENGTHTABLE_NAME = "lengthtable"; |
|
40 |
|
/*}}} default -- END */ |
|
41 |
|
/*{{{ opts_new */ |
|
42 |
|
static struct Options *opts_new(void) |
|
43 |
|
{ |
|
44 |
|
struct Options *t; |
|
45 |
|
|
|
46 |
|
t = calloc(1, sizeof(*t)); |
|
47 |
|
t->option_word = OPTS_ANSIC; |
|
48 |
|
t->jump = OPTS_DEFAULT_JUMP_VALUE; |
|
49 |
|
t->total_switches = 1; |
|
50 |
|
t->size_multiple = 1; |
|
51 |
|
t->function_name = DEFAULT_FUNCTION_NAME; |
|
52 |
|
t->slot_name = DEFAULT_SLOT_NAME; |
|
53 |
|
t->initializer_suffix = DEFAULT_INITIALIZER_SUFFIX; |
|
54 |
|
t->class_name = DEFAULT_CLASS_NAME; |
|
55 |
|
t->hash_name = DEFAULT_HASH_NAME; |
|
56 |
|
t->wordlist_name = DEFAULT_WORDLIST_NAME; |
|
57 |
|
t->lengthtable_name = DEFAULT_LENGTHTABLE_NAME; |
|
58 |
|
t->stringpool_name = DEFAULT_STRINGPOOL_NAME; |
|
59 |
|
t->constants_prefix = DEFAULT_CONSTANTS_PREFIX; |
|
60 |
|
t->delimiters = DEFAULT_DELIMITERS; |
|
61 |
|
t->key_positions = pos_new(); |
|
62 |
|
return t; |
|
63 |
|
}/*}}}*/ |
|
64 |
|
/*{{{ opts_del */ |
|
65 |
|
static void opts_del(struct Options *t) |
|
66 |
|
{ |
|
67 |
|
if (OPTS(DEBUG)) { |
|
68 |
|
struct PositionIterator *iter; |
|
69 |
|
s32 pos; |
|
70 |
|
|
|
71 |
|
fprintf(stderr, "\ndumping Options:" |
|
72 |
|
"\nTYPE is........: %s" |
|
73 |
|
"\nUPPERLOWER is..: %s" |
|
74 |
|
"\nKRC is.........: %s" |
|
75 |
|
"\nC is...........: %s" |
|
76 |
|
"\nANSIC is.......: %s" |
|
77 |
|
"\nCPLUSPLUS is...: %s" |
|
78 |
|
"\nSEVENBIT is....: %s" |
|
79 |
|
"\nLENTABLE is....: %s" |
|
80 |
|
"\nCOMP is........: %s" |
|
81 |
|
"\nCONST is.......: %s" |
|
82 |
|
"\nENUM is........: %s" |
|
83 |
|
"\nINCLUDE is.....: %s" |
|
84 |
|
"\nGLOBAL is......: %s" |
|
85 |
|
"\nNULLSTRINGS is.: %s" |
|
86 |
|
"\nSHAREDLIB is...: %s" |
|
87 |
|
"\nSWITCH is......: %s" |
|
88 |
|
"\nNOTYPE is......: %s" |
|
89 |
|
"\nDUP is.........: %s" |
|
90 |
|
"\nNOLENGTH is....: %s" |
|
91 |
|
"\nRANDOM is......: %s" |
|
92 |
|
"\nDEBUG is.......: %s" |
|
93 |
|
"\nlookup function name = %s" |
|
94 |
|
"\nhash function name = %s" |
|
95 |
|
"\nword list name = %s" |
|
96 |
|
"\nlength table name = %s" |
|
97 |
|
"\nstring pool name = %s" |
|
98 |
|
"\nslot name = %s" |
|
99 |
|
"\ninitializer suffix = %s" |
|
100 |
|
"\nasso_values iterations = %d" |
|
101 |
|
"\njump value = %d" |
|
102 |
|
"\nhash table size multiplier = %g" |
|
103 |
|
"\ninitial associated value = %d" |
|
104 |
|
"\ndelimiters = %s" |
|
105 |
|
"\nnumber of switch statements = %d\n", |
|
106 |
|
OPTS(TYPE) ? "enabled" : "disabled", |
|
107 |
|
OPTS(UPPERLOWER) ? "enabled" : "disabled", |
|
108 |
|
OPTS(KRC) ? "enabled" : "disabled", |
|
109 |
|
OPTS(C) ? "enabled" : "disabled", |
|
110 |
|
OPTS(ANSIC) ? "enabled" : "disabled", |
|
111 |
|
OPTS(CPLUSPLUS) ? "enabled" : "disabled", |
|
112 |
|
OPTS(SEVENBIT) ? "enabled" : "disabled", |
|
113 |
|
OPTS(LENTABLE) ? "enabled" : "disabled", |
|
114 |
|
OPTS(COMP) ? "enabled" : "disabled", |
|
115 |
|
OPTS(CONST) ? "enabled" : "disabled", |
|
116 |
|
OPTS(ENUM) ? "enabled" : "disabled", |
|
117 |
|
OPTS(INCLUDE) ? "enabled" : "disabled", |
|
118 |
|
OPTS(GLOBAL) ? "enabled" : "disabled", |
|
119 |
|
OPTS(NULLSTRINGS) ? "enabled" : "disabled", |
|
120 |
|
OPTS(SHAREDLIB) ? "enabled" : "disabled", |
|
121 |
|
OPTS(SWITCH) ? "enabled" : "disabled", |
|
122 |
|
OPTS(NOTYPE) ? "enabled" : "disabled", |
|
123 |
|
OPTS(DUP) ? "enabled" : "disabled", |
|
124 |
|
OPTS(NOLENGTH) ? "enabled" : "disabled", |
|
125 |
|
OPTS(RANDOM) ? "enabled" : "disabled", |
|
126 |
|
OPTS(DEBUG) ? "enabled" : "disabled", |
|
127 |
|
t->function_name, t->hash_name, t->wordlist_name, t->lengthtable_name, |
|
128 |
|
t->stringpool_name, t->slot_name, t->initializer_suffix, |
|
129 |
|
t->asso_iterations, t->jump, t->size_multiple, t->initial_asso_value, |
|
130 |
|
t->delimiters, t->total_switches); |
|
131 |
|
if (t->key_positions->useall) |
|
132 |
|
fprintf(stderr, "all characters are used in the hash function\n"); |
|
133 |
|
else { |
|
134 |
|
fprintf(stderr, "maximum keysig size = %d\nkey positions are: \n", t->key_positions->size); |
|
135 |
|
iter = pos_iterator_all(t->key_positions); |
|
136 |
|
loop { |
|
137 |
|
pos = positer_next(iter); |
|
138 |
|
if (pos == POSITER_EOS) |
|
139 |
|
break; |
|
140 |
|
if (pos == POS_LASTCHAR) |
|
141 |
|
fprintf(stderr, "$\n"); |
|
142 |
|
else |
|
143 |
|
fprintf(stderr, "%d\n", pos + 1); |
|
144 |
|
|
|
145 |
|
} |
|
146 |
|
} |
|
147 |
|
fprintf (stderr, "finished dumping Options\n"); |
|
148 |
|
} |
|
149 |
|
pos_del(t->key_positions); |
|
150 |
|
free(t); |
|
151 |
|
}/*}}}*/ |
|
152 |
|
/*{{{ opts_long_options |
|
153 |
|
Parses the command line Options and sets appropriate flags in option_word. */ |
|
154 |
|
static const struct option opts_long_options[] = |
|
155 |
|
{ |
|
156 |
|
{ "output-file", required_argument, NULL, CHAR_MAX + 1 }, |
|
157 |
|
{ "ignore-case", no_argument, NULL, CHAR_MAX + 2 }, |
|
158 |
|
{ "delimiters", required_argument, NULL, 'e' }, |
|
159 |
|
{ "struct-type", no_argument, NULL, 't' }, |
|
160 |
|
{ "language", required_argument, NULL, 'L' }, |
|
161 |
|
{ "slot-name", required_argument, NULL, 'K' }, |
|
162 |
|
{ "initializer-suffix", required_argument, NULL, 'F' }, |
|
163 |
|
{ "hash-fn-name", required_argument, NULL, 'H' }, /* backward compatibility */ |
|
164 |
|
{ "hash-function-name", required_argument, NULL, 'H' }, |
|
165 |
|
{ "lookup-fn-name", required_argument, NULL, 'N' }, /* backward compatibility */ |
|
166 |
|
{ "lookup-function-name", required_argument, NULL, 'N' }, |
|
167 |
|
{ "class-name", required_argument, NULL, 'Z' }, |
|
168 |
|
{ "seven-bit", no_argument, NULL, '7' }, |
|
169 |
|
{ "compare-strncmp", no_argument, NULL, 'c' }, |
|
170 |
|
{ "readonly-tables", no_argument, NULL, 'C' }, |
|
171 |
|
{ "enum", no_argument, NULL, 'E' }, |
|
172 |
|
{ "includes", no_argument, NULL, 'I' }, |
|
173 |
|
{ "global-table", no_argument, NULL, 'G' }, |
|
174 |
|
{ "constants-prefix", required_argument, NULL, CHAR_MAX + 5 }, |
|
175 |
|
{ "word-array-name", required_argument, NULL, 'W' }, |
|
176 |
|
{ "length-table-name", required_argument, NULL, CHAR_MAX + 4 }, |
|
177 |
|
{ "switch", required_argument, NULL, 'S' }, |
|
178 |
|
{ "omit-struct-type", no_argument, NULL, 'T' }, |
|
179 |
|
{ "key-positions", required_argument, NULL, 'k' }, |
|
180 |
|
{ "compare-strlen", no_argument, NULL, 'l' }, /* backward compatibility */ |
|
181 |
|
{ "compare-lengths", no_argument, NULL, 'l' }, |
|
182 |
|
{ "duplicates", no_argument, NULL, 'D' }, |
|
183 |
|
{ "fast", required_argument, NULL, 'f' }, |
|
184 |
|
{ "initial-asso", required_argument, NULL, 'i' }, |
|
185 |
|
{ "jump", required_argument, NULL, 'j' }, |
|
186 |
|
{ "multiple-iterations", required_argument, NULL, 'm' }, |
|
187 |
|
{ "no-strlen", no_argument, NULL, 'n' }, |
|
188 |
|
{ "occurrence-sort", no_argument, NULL, 'o' }, |
|
189 |
|
{ "optimized-collision-resolution", no_argument, NULL, 'O' }, |
|
190 |
|
{ "pic", no_argument, NULL, 'P' }, |
|
191 |
|
{ "string-pool-name", required_argument, NULL, 'Q' }, |
|
192 |
|
{ "null-strings", no_argument, NULL, CHAR_MAX + 3 }, |
|
193 |
|
{ "random", no_argument, NULL, 'r' }, |
|
194 |
|
{ "size-multiple", required_argument, NULL, 's' }, |
|
195 |
|
{ "help", no_argument, NULL, 'h' }, |
|
196 |
|
{ "version", no_argument, NULL, 'v' }, |
|
197 |
|
{ "debug", no_argument, NULL, 'd' }, |
|
198 |
|
{ NULL, no_argument, NULL, 0 } |
|
199 |
|
};/*}}}*/ |
|
200 |
|
/*{{{ opts_parse_options */ |
|
201 |
|
static void opts_parse_options(struct Options *t, u32 argc, u8 **argv) |
|
202 |
|
{ |
|
203 |
|
opts_program_name = (u8*)argv[0]; |
|
204 |
|
t->argument_count = argc; |
|
205 |
|
t->argument_vector = argv; |
|
206 |
|
|
|
207 |
|
loop { |
|
208 |
|
int option_char; |
|
209 |
|
|
|
210 |
|
option_char = getopt_long(t->argument_count, t->argument_vector, |
|
211 |
|
"acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oOpPQ:rs:S:tTvW:Z:7", opts_long_options, |
|
212 |
|
NULL); |
|
213 |
|
if (option_char == -1) |
|
214 |
|
break; |
|
215 |
|
switch (option_char) { |
|
216 |
|
case 'a': /* generated code uses the ANSI prototype format */ |
|
217 |
|
break; /* This is now the default */ |
|
218 |
|
case 'c': /* generate strncmp rather than strcmp */ |
|
219 |
|
t->option_word |= OPTS_COMP; |
|
220 |
|
break; |
|
221 |
|
case 'C': /* make the generated tables readonly (const) */ |
|
222 |
|
t->option_word |= OPTS_CONST; |
|
223 |
|
break; |
|
224 |
|
case 'd': /* enable debugging option */ |
|
225 |
|
t->option_word |= OPTS_DEBUG; |
|
226 |
|
fprintf(stderr, "Starting program %s, version %s, with debugging on.\n", opts_program_name, cgperf_version_string); |
|
227 |
|
break; |
|
228 |
|
case 'D': /* enable duplicate option */ |
|
229 |
|
t->option_word |= OPTS_DUP; |
|
230 |
|
break; |
|
231 |
|
case 'e': /* specify keyword/attribute separator */ |
|
232 |
|
t->delimiters = /*getopt*/(u8*)optarg; |
|
233 |
|
break; |
|
234 |
|
case 'E': |
|
235 |
|
t->option_word |= OPTS_ENUM; |
|
236 |
|
break; |
|
237 |
|
case 'f': /* generate the hash table "fast" */ |
|
238 |
|
break; /* Not needed any more */ |
|
239 |
|
case 'F': |
|
240 |
|
t->initializer_suffix = /*getopt*/(u8*)optarg; |
|
241 |
|
break; |
|
242 |
|
case 'g': /* use the 'inline' keyword for generated sub-routines, ifdef __GNUC__ */ |
|
243 |
|
break; /* This is now the default */ |
|
244 |
|
case 'G': /* make the keyword table a global variable */ |
|
245 |
|
t->option_word |= OPTS_GLOBAL; |
|
246 |
|
break; |
|
247 |
|
case 'h': /* displays a list of helpful Options to the user */ |
|
248 |
|
opts_long_usage(stdout); |
|
249 |
|
exit(0); |
|
250 |
|
case 'H': /* sets the name for the hash function */ |
|
251 |
|
t->hash_name = /*getopt*/(u8*)optarg; |
|
252 |
|
break; |
|
253 |
|
case 'i': /* sets the initial value for the associated values array */ |
|
254 |
|
t->initial_asso_value = atoi(/*getopt*/optarg); |
|
255 |
|
if (t->initial_asso_value < 0) |
|
256 |
|
fprintf(stderr, "Initial value %d should be non-zero, ignoring and continuing.\n", t->initial_asso_value); |
|
257 |
|
if (OPTS(RANDOM)) |
|
258 |
|
fprintf(stderr, "warning, -r option superceeds -i, ignoring -i option and continuing\n"); |
|
259 |
|
break; |
|
260 |
|
case 'I': /* enable #include statements */ |
|
261 |
|
t->option_word |= OPTS_INCLUDE; |
|
262 |
|
break; |
|
263 |
|
case 'j': /* sets the jump value, must be odd for later algorithms */ |
|
264 |
|
t->jump = atoi (/*getopt*/optarg); |
|
265 |
|
if (t->jump < 0) { |
|
266 |
|
fprintf(stderr, "Jump value %d must be a positive number.\n", t->jump); |
|
267 |
|
opts_short_usage(stderr); |
|
268 |
|
exit(1); |
|
269 |
|
} else if ((t->jump != 0) && ((t->jump % 2) == 0)) |
|
270 |
|
fprintf (stderr, "Jump value %d should be odd, adding 1 and continuing...\n", t->jump++); |
|
271 |
|
break; |
|
272 |
|
case 'k': { /* sets key positions used for hash function */ |
|
273 |
|
t->option_word |= OPTS_POSITIONS; |
|
274 |
|
s32 BAD_VALUE = -3; |
|
275 |
|
s32 EOS = POSITER_EOS; |
|
276 |
|
s32 value; |
|
277 |
|
struct PositionStringParser *sparser; |
|
278 |
|
|
|
279 |
|
sparser = posstrp_new(/*getopt*/(u8*)optarg, 1, |
|
280 |
|
POS_MAX_KEY_POS, POS_LASTCHAR, BAD_VALUE, EOS); |
|
281 |
|
|
|
282 |
|
if (/*getopt*/optarg[0] == '*') /* use all the characters for hashing!!!! */ |
|
283 |
|
pos_set_useall(t->key_positions, true); |
|
284 |
|
else { |
|
285 |
|
s32 *key_positions; |
|
286 |
|
s32 *key_pos; |
|
287 |
|
u32 total_keysig_size; |
|
288 |
|
|
|
289 |
|
pos_set_useall(t->key_positions, false); |
|
290 |
|
key_positions = t->key_positions->positions; |
|
291 |
|
|
|
292 |
|
key_pos = key_positions; |
|
293 |
|
loop { |
|
294 |
|
value = posstrp_nextPosition(sparser); |
|
295 |
|
if (value == EOS) |
|
296 |
|
break; |
|
297 |
|
if (value == BAD_VALUE) { |
|
298 |
|
fprintf(stderr, "Invalid position value or range, use 1,2,3-%d,'$' or '*'.\n", POS_MAX_KEY_POS); |
|
299 |
|
opts_short_usage(stderr); |
|
300 |
|
exit(1); |
|
301 |
|
} |
|
302 |
|
if ((key_pos - key_positions) == POS_MAX_SIZE) { |
|
303 |
|
/* |
|
304 |
|
* More than Positions_max_size key positions. |
|
305 |
|
* Since all key positions are in the range |
|
306 |
|
* 0..Positions_max_key_pos-1 or == Positions_lastchar, |
|
307 |
|
* there must be duplicates. |
|
308 |
|
*/ |
|
309 |
|
fprintf(stderr, "Duplicate key positions selected\n"); |
|
310 |
|
opts_short_usage(stderr); |
|
311 |
|
exit(1); |
|
312 |
|
} |
|
313 |
|
if (value != POS_LASTCHAR) |
|
314 |
|
/* We use 0-based indices in the class Positions */ |
|
315 |
|
value = value - 1; |
|
316 |
|
*key_pos = value; |
|
317 |
|
++key_pos; |
|
318 |
|
} |
|
319 |
|
total_keysig_size = key_pos - key_positions; |
|
320 |
|
if (total_keysig_size == 0) { |
|
321 |
|
fprintf(stderr, "No key positions selected.\n"); |
|
322 |
|
opts_short_usage(stderr); |
|
323 |
|
exit(1); |
|
324 |
|
} |
|
325 |
|
t->key_positions->size = total_keysig_size; |
|
326 |
|
/* |
|
327 |
|
* Sorts the key positions *IN REVERSE ORDER!!* |
|
328 |
|
* This makes further routines more efficient. |
|
329 |
|
* Especially when generating code. |
|
330 |
|
*/ |
|
331 |
|
if (!pos_sort(t->key_positions)) { |
|
332 |
|
fprintf(stderr, "Duplicate key positions selected\n"); |
|
333 |
|
opts_short_usage(stderr); |
|
334 |
|
exit(1); |
|
335 |
|
} |
|
336 |
|
} |
|
337 |
|
break;} |
|
338 |
|
case 'K':/* make this the keyname for the keyword component field */ |
|
339 |
|
t->slot_name = /*getopt*/optarg; |
|
340 |
|
break; |
|
341 |
|
case 'l':/* create length table to avoid extra string compares */ |
|
342 |
|
t->option_word |= OPTS_LENTABLE; |
|
343 |
|
break; |
|
344 |
|
case 'L':/* deal with different generated languages */ |
|
345 |
|
t->language = 0; |
|
346 |
|
opts_set_language(t,/*getopt*/optarg); |
|
347 |
|
break; |
|
348 |
|
case 'm':/* multiple iterations for finding good asso_values */ |
|
349 |
|
t->asso_iterations = atoi(/*getopt*/optarg); |
|
350 |
|
if (t->asso_iterations < 0) { |
|
351 |
|
fprintf(stderr, "asso_iterations value must not be negative, assuming 0\n"); |
|
352 |
|
t->asso_iterations = 0; |
|
353 |
|
} |
|
354 |
|
break; |
|
355 |
|
case 'n':/* don't include the length when computing hash function */ |
|
356 |
|
t->option_word |= OPTS_NOLENGTH; |
|
357 |
|
break; |
|
358 |
|
case 'N':/* make generated lookup function name be optarg */ |
|
359 |
|
t->function_name = /*getopt*/optarg; |
|
360 |
|
break; |
|
361 |
|
case 'o':/* order input by frequency of key set occurrence */ |
|
362 |
|
break; /* not needed any more */ |
|
363 |
|
case 'O':/* optimized choice during collision resolution */ |
|
364 |
|
break; /* not needed any more */ |
|
365 |
|
case 'p':/* generated lookup function a pointer instead of int */ |
|
366 |
|
break; /* this is now the default */ |
|
367 |
|
case 'P':/* optimize for position-independent code */ |
|
368 |
|
t->option_word |= OPTS_SHAREDLIB; |
|
369 |
|
break; |
|
370 |
|
case 'Q':/* sets the name for the string pool */ |
|
371 |
|
t->stringpool_name = /*getopt*/optarg; |
|
372 |
|
break; |
|
373 |
|
case 'r':/* utilize randomness to initialize the associated values table */ |
|
374 |
|
t->option_word |= OPTS_RANDOM; |
|
375 |
|
if (t->initial_asso_value != 0) |
|
376 |
|
fprintf(stderr, "warning, -r option supersedes -i, disabling -i option and continuing\n"); |
|
377 |
|
break; |
|
378 |
|
case 's':{/* range of associated values, determines size of final table */ |
|
379 |
|
f32 numerator; |
|
380 |
|
f32 denominator; |
|
381 |
|
bool invalid; |
|
382 |
|
u8 *endptr; |
|
383 |
|
|
|
384 |
|
denominator = 1; |
|
385 |
|
invalid = false; |
|
386 |
|
numerator = strtod(/*getopt*/optarg, &endptr); |
|
387 |
|
if (endptr == /*getopt*/(u8*)optarg) |
|
388 |
|
invalid = true; |
|
389 |
|
else if (*endptr != '\0') { |
|
390 |
|
if (*endptr == '/') { |
|
391 |
|
u8 *denomptr; |
|
392 |
|
|
|
393 |
|
denomptr = endptr + 1; |
|
394 |
|
denominator = strtod(denomptr, &endptr); |
|
395 |
|
if (endptr == denomptr || *endptr != '\0') |
|
396 |
|
invalid = true; |
|
397 |
|
} else |
|
398 |
|
invalid = true; |
|
399 |
|
} |
|
400 |
|
if (invalid) { |
|
401 |
|
fprintf(stderr, "Invalid value for option -s.\n"); |
|
402 |
|
opts_short_usage(stderr); |
|
403 |
|
exit (1); |
|
404 |
|
} |
|
405 |
|
t->size_multiple = numerator / denominator; |
|
406 |
|
/* backward compatibility: -3 means 1/3 */ |
|
407 |
|
if (t->size_multiple < 0) |
|
408 |
|
t->size_multiple = 1 / (-t->size_multiple); |
|
409 |
|
/* catch stupid users and port to C the c++ from stupid coders */ |
|
410 |
|
if (t->size_multiple == 0) |
|
411 |
|
t->size_multiple = 1; |
|
412 |
|
/* warnings */ |
|
413 |
|
if (t->size_multiple > 50) |
|
414 |
|
fprintf(stderr, "Size multiple %g is excessive, did you really mean this?! (try '%s --help' for help)\n", t->size_multiple, opts_program_name); |
|
415 |
|
else if (t->size_multiple < 0.01f) |
|
416 |
|
fprintf(stderr, "Size multiple %g is extremely small, did you really mean this?! (try '%s --help' for help)\n", t->size_multiple, opts_program_name); |
|
417 |
|
break;} |
|
418 |
|
case 'S':/* generate switch statement output, rather than lookup table */ |
|
419 |
|
t->option_word |= OPTS_SWITCH; |
|
420 |
|
t->total_switches = atoi(/*getopt*/optarg); |
|
421 |
|
if (t->total_switches <= 0) { |
|
422 |
|
fprintf(stderr, "number of switches %s must be a positive number\n", /*getopt*/optarg); |
|
423 |
|
opts_short_usage (stderr); |
|
424 |
|
exit(1); |
|
425 |
|
} |
|
426 |
|
break; |
|
427 |
|
case 't':/* enable the TYPE mode, allowing arbitrary user structures */ |
|
428 |
|
t->option_word |= OPTS_TYPE; |
|
429 |
|
break; |
|
430 |
|
case 'T':/* don't print structure definition */ |
|
431 |
|
t->option_word |= OPTS_NOTYPE; |
|
432 |
|
break; |
|
433 |
|
case 'v':/* print out the version and quit */ |
|
434 |
|
fprintf(stdout, "GNU gperf %s\n", cgperf_version_string); |
|
435 |
|
fprintf(stdout, "Copyright (C) %s Free Software Foundation, Inc.\n\ |
|
436 |
|
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>\n\ |
|
437 |
|
This is free software: you are free to change and redistribute it.\n\ |
|
438 |
|
There is NO WARRANTY, to the extent permitted by law.\n\ |
|
439 |
|
", |
|
440 |
|
"1989-2018"); |
|
441 |
|
fprintf(stdout, "Written by %s and %s. C89 with benign bits of C99/C11 port by Sylvain BERTRAND\n", "Douglas C. Schmidt", "Bruno Haible"); |
|
442 |
|
exit(0); |
|
443 |
|
case 'W':/* sets the name for the hash table array */ |
|
444 |
|
t->wordlist_name = /*getopt*/optarg; |
|
445 |
|
break; |
|
446 |
|
case 'Z':/* set the class name */ |
|
447 |
|
t->class_name = /*getopt*/optarg; |
|
448 |
|
break; |
|
449 |
|
case '7':/* assume 7-bit characters */ |
|
450 |
|
t->option_word |= OPTS_SEVENBIT; |
|
451 |
|
break; |
|
452 |
|
case CHAR_MAX + 1:/* set the output file name */ |
|
453 |
|
t->output_file_name = /*getopt*/optarg; |
|
454 |
|
break; |
|
455 |
|
case CHAR_MAX + 2:/* case insignificant */ |
|
456 |
|
t->option_word |= OPTS_UPPERLOWER; |
|
457 |
|
break; |
|
458 |
|
case CHAR_MAX + 3:/* use NULL instead of "" */ |
|
459 |
|
t->option_word |= OPTS_NULLSTRINGS; |
|
460 |
|
break; |
|
461 |
|
case CHAR_MAX + 4:/* sets the name for the length table array */ |
|
462 |
|
t->lengthtable_name = /*getopt*/optarg; |
|
463 |
|
break; |
|
464 |
|
case CHAR_MAX + 5:/* sets the prefix for the constants */ |
|
465 |
|
t->constants_prefix = /*getopt*/optarg; |
|
466 |
|
break; |
|
467 |
|
default: |
|
468 |
|
opts_short_usage(stderr); |
|
469 |
|
exit(1); |
|
470 |
|
} |
|
471 |
|
} |
|
472 |
|
if (/*getopt*/optind < argc) |
|
473 |
|
t->input_file_name = argv[/*getopt*/optind++]; |
|
474 |
|
|
|
475 |
|
if (/*getopt*/optind < argc) { |
|
476 |
|
fprintf(stderr, "Extra trailing arguments to %s.\n", opts_program_name); |
|
477 |
|
opts_short_usage(stderr); |
|
478 |
|
exit(1); |
|
479 |
|
} |
|
480 |
|
}/*}}}*/ |
|
481 |
|
/*{{{ opts_short_usage */ |
|
482 |
|
static void opts_short_usage(FILE * stream) |
|
483 |
|
{ |
|
484 |
|
fprintf(stream, "Try '%s --help' for more information.\n", opts_program_name); |
|
485 |
|
}/*}}}*/ |
|
486 |
|
/*{{{ opts_long_usage */ |
|
487 |
|
static void opts_long_usage(FILE * stream) |
|
488 |
|
{ |
|
489 |
|
fprintf(stream, |
|
490 |
|
"GNU 'gperf' generates perfect hash functions.\n"); |
|
491 |
|
fprintf(stream, "\n"); |
|
492 |
|
fprintf(stream, |
|
493 |
|
"Usage: %s [OPTION]... [INPUT-FILE]\n", |
|
494 |
|
opts_program_name); |
|
495 |
|
fprintf(stream, "\n"); |
|
496 |
|
fprintf(stream, |
|
497 |
|
"If a long option shows an argument as mandatory, then it is mandatory\n" |
|
498 |
|
"for the equivalent short option also.\n"); |
|
499 |
|
fprintf(stream, "\n"); |
|
500 |
|
fprintf(stream, |
|
501 |
|
"Output file location:\n"); |
|
502 |
|
fprintf(stream, |
|
503 |
|
" --output-file=FILE Write output to specified file.\n"); |
|
504 |
|
fprintf(stream, |
|
505 |
|
"The results are written to standard output if no output file is specified\n" |
|
506 |
|
"or if it is -.\n"); |
|
507 |
|
fprintf(stream, "\n"); |
|
508 |
|
fprintf(stream, |
|
509 |
|
"Input file interpretation:\n"); |
|
510 |
|
fprintf(stream, |
|
511 |
|
" -e, --delimiters=DELIMITER-LIST\n" |
|
512 |
|
" Allow user to provide a string containing delimiters\n" |
|
513 |
|
" used to separate keywords from their attributes.\n" |
|
514 |
|
" Default is \",\".\n"); |
|
515 |
|
fprintf(stream, |
|
516 |
|
" -t, --struct-type Allows the user to include a structured type\n" |
|
517 |
|
" declaration for generated code. Any text before %%%%\n" |
|
518 |
|
" is considered part of the type declaration. Key\n" |
|
519 |
|
" words and additional fields may follow this, one\n" |
|
520 |
|
" group of fields per line.\n"); |
|
521 |
|
fprintf(stream, |
|
522 |
|
" --ignore-case Consider upper and lower case ASCII characters as\n" |
|
523 |
|
" equivalent. Note that locale dependent case mappings\n" |
|
524 |
|
" are ignored.\n"); |
|
525 |
|
fprintf(stream, "\n"); |
|
526 |
|
fprintf(stream, |
|
527 |
|
"Language for the output code:\n"); |
|
528 |
|
fprintf(stream, |
|
529 |
|
" -L, --language=LANGUAGE-NAME\n" |
|
530 |
|
" Generates code in the specified language. Languages\n" |
|
531 |
|
" handled are currently C++, ANSI-C, C, and KR-C. The\n" |
|
532 |
|
" default is ANSI-C.\n"); |
|
533 |
|
fprintf(stream, "\n"); |
|
534 |
|
fprintf(stream, |
|
535 |
|
"Details in the output code:\n"); |
|
536 |
|
fprintf(stream, |
|
537 |
|
" -K, --slot-name=NAME Select name of the keyword component in the keyword\n" |
|
538 |
|
" structure.\n"); |
|
539 |
|
fprintf(stream, |
|
540 |
|
" -F, --initializer-suffix=INITIALIZERS\n" |
|
541 |
|
" Initializers for additional components in the keyword\n" |
|
542 |
|
" structure.\n"); |
|
543 |
|
fprintf(stream, |
|
544 |
|
" -H, --hash-function-name=NAME\n" |
|
545 |
|
" Specify name of generated hash function. Default is\n" |
|
546 |
|
" 'hash'.\n"); |
|
547 |
|
fprintf(stream, |
|
548 |
|
" -N, --lookup-function-name=NAME\n" |
|
549 |
|
" Specify name of generated lookup function. Default\n" |
|
550 |
|
" name is 'in_word_set'.\n"); |
|
551 |
|
fprintf(stream, |
|
552 |
|
" -Z, --class-name=NAME Specify name of generated C++ class. Default name is\n" |
|
553 |
|
" 'Perfect_Hash'.\n"); |
|
554 |
|
fprintf(stream, |
|
555 |
|
" -7, --seven-bit Assume 7-bit characters.\n"); |
|
556 |
|
fprintf(stream, |
|
557 |
|
" -l, --compare-lengths Compare key lengths before trying a string\n" |
|
558 |
|
" comparison. This is necessary if the keywords\n" |
|
559 |
|
" contain NUL bytes. It also helps cut down on the\n" |
|
560 |
|
" number of string comparisons made during the lookup.\n"); |
|
561 |
|
fprintf(stream, |
|
562 |
|
" -c, --compare-strncmp Generate comparison code using strncmp rather than\n" |
|
563 |
|
" strcmp.\n"); |
|
564 |
|
fprintf(stream, |
|
565 |
|
" -C, --readonly-tables Make the contents of generated lookup tables\n" |
|
566 |
|
" constant, i.e., readonly.\n"); |
|
567 |
|
fprintf(stream, |
|
568 |
|
" -E, --enum Define constant values using an enum local to the\n" |
|
569 |
|
" lookup function rather than with defines.\n"); |
|
570 |
|
fprintf(stream, |
|
571 |
|
" -I, --includes Include the necessary system include file <string.h>\n" |
|
572 |
|
" at the beginning of the code.\n"); |
|
573 |
|
fprintf(stream, |
|
574 |
|
" -G, --global-table Generate the static table of keywords as a static\n" |
|
575 |
|
" global variable, rather than hiding it inside of the\n" |
|
576 |
|
" lookup function (which is the default behavior).\n"); |
|
577 |
|
fprintf(stream, |
|
578 |
|
" -P, --pic Optimize the generated table for inclusion in shared\n" |
|
579 |
|
" libraries. This reduces the startup time of programs\n" |
|
580 |
|
" using a shared library containing the generated code.\n"); |
|
581 |
|
fprintf(stream, |
|
582 |
|
" -Q, --string-pool-name=NAME\n" |
|
583 |
|
" Specify name of string pool generated by option --pic.\n" |
|
584 |
|
" Default name is 'stringpool'.\n"); |
|
585 |
|
fprintf(stream, |
|
586 |
|
" --null-strings Use NULL strings instead of empty strings for empty\n" |
|
587 |
|
" keyword table entries.\n"); |
|
588 |
|
fprintf(stream, |
|
589 |
|
" --constants-prefix=PREFIX\n" |
|
590 |
|
" Specify prefix for the constants like TOTAL_KEYWORDS.\n"); |
|
591 |
|
fprintf(stream, |
|
592 |
|
" -W, --word-array-name=NAME\n" |
|
593 |
|
" Specify name of word list array. Default name is\n" |
|
594 |
|
" 'wordlist'.\n"); |
|
595 |
|
fprintf(stream, |
|
596 |
|
" --length-table-name=NAME\n" |
|
597 |
|
" Specify name of length table array. Default name is\n" |
|
598 |
|
" 'lengthtable'.\n"); |
|
599 |
|
fprintf(stream, |
|
600 |
|
" -S, --switch=COUNT Causes the generated C code to use a switch\n" |
|
601 |
|
" statement scheme, rather than an array lookup table.\n" |
|
602 |
|
" This can lead to a reduction in both time and space\n" |
|
603 |
|
" requirements for some keyfiles. The COUNT argument\n" |
|
604 |
|
" determines how many switch statements are generated.\n" |
|
605 |
|
" A value of 1 generates 1 switch containing all the\n" |
|
606 |
|
" elements, a value of 2 generates 2 tables with 1/2\n" |
|
607 |
|
" the elements in each table, etc. If COUNT is very\n" |
|
608 |
|
" large, say 1000000, the generated C code does a\n" |
|
609 |
|
" binary search.\n"); |
|
610 |
|
fprintf(stream, |
|
611 |
|
" -T, --omit-struct-type\n" |
|
612 |
|
" Prevents the transfer of the type declaration to the\n" |
|
613 |
|
" output file. Use this option if the type is already\n" |
|
614 |
|
" defined elsewhere.\n"); |
|
615 |
|
fprintf(stream, "\n"); |
|
616 |
|
fprintf(stream, |
|
617 |
|
"Algorithm employed by gperf:\n"); |
|
618 |
|
fprintf(stream, |
|
619 |
|
" -k, --key-positions=KEYS\n" |
|
620 |
|
" Select the key positions used in the hash function.\n" |
|
621 |
|
" The allowable choices range between 1-%d, inclusive.\n" |
|
622 |
|
" The positions are separated by commas, ranges may be\n" |
|
623 |
|
" used, and key positions may occur in any order.\n" |
|
624 |
|
" Also, the meta-character '*' causes the generated\n" |
|
625 |
|
" hash function to consider ALL key positions, and $\n" |
|
626 |
|
" indicates the \"final character\" of a key, e.g.,\n" |
|
627 |
|
" $,1,2,4,6-10.\n", |
|
628 |
|
POS_MAX_KEY_POS); |
|
629 |
|
fprintf(stream, |
|
630 |
|
" -D, --duplicates Handle keywords that hash to duplicate values. This\n" |
|
631 |
|
" is useful for certain highly redundant keyword sets.\n"); |
|
632 |
|
fprintf(stream, |
|
633 |
|
" -m, --multiple-iterations=ITERATIONS\n" |
|
634 |
|
" Perform multiple choices of the -i and -j values,\n" |
|
635 |
|
" and choose the best results. This increases the\n" |
|
636 |
|
" running time by a factor of ITERATIONS but does a\n" |
|
637 |
|
" good job minimizing the generated table size.\n"); |
|
638 |
|
fprintf(stream, |
|
639 |
|
" -i, --initial-asso=N Provide an initial value for the associate values\n" |
|
640 |
|
" array. Default is 0. Setting this value larger helps\n" |
|
641 |
|
" inflate the size of the final table.\n"); |
|
642 |
|
fprintf(stream, |
|
643 |
|
" -j, --jump=JUMP-VALUE Affects the \"jump value\", i.e., how far to advance\n" |
|
644 |
|
" the associated character value upon collisions. Must\n" |
|
645 |
|
" be an odd number, default is %d.\n", |
|
646 |
|
OPTS_DEFAULT_JUMP_VALUE); |
|
647 |
|
fprintf(stream, |
|
648 |
|
" -n, --no-strlen Do not include the length of the keyword when\n" |
|
649 |
|
" computing the hash function.\n"); |
|
650 |
|
fprintf(stream, |
|
651 |
|
" -r, --random Utilizes randomness to initialize the associated\n" |
|
652 |
|
" values table.\n"); |
|
653 |
|
fprintf(stream, |
|
654 |
|
" -s, --size-multiple=N Affects the size of the generated hash table. The\n" |
|
655 |
|
" numeric argument N indicates \"how many times larger\n" |
|
656 |
|
" or smaller\" the associated value range should be,\n" |
|
657 |
|
" in relationship to the number of keys, e.g. a value\n" |
|
658 |
|
" of 3 means \"allow the maximum associated value to\n" |
|
659 |
|
" be about 3 times larger than the number of input\n" |
|
660 |
|
" keys\". Conversely, a value of 1/3 means \"make the\n" |
|
661 |
|
" maximum associated value about 3 times smaller than\n" |
|
662 |
|
" the number of input keys\". A larger table should\n" |
|
663 |
|
" decrease the time required for an unsuccessful\n" |
|
664 |
|
" search, at the expense of extra table space. Default\n" |
|
665 |
|
" value is 1.\n"); |
|
666 |
|
fprintf(stream, "\n"); |
|
667 |
|
fprintf(stream, |
|
668 |
|
"Informative output:\n" |
|
669 |
|
" -h, --help Print this message.\n" |
|
670 |
|
" -v, --version Print the gperf version number.\n" |
|
671 |
|
" -d, --debug Enables the debugging option (produces verbose\n" |
|
672 |
|
" output to the standard error).\n"); |
|
673 |
|
fprintf(stream, "\n"); |
|
674 |
|
fprintf(stream, |
|
675 |
|
"Report bugs to <bug-gperf@gnu.org>.\n"); |
|
676 |
|
}/*}}}*/ |
|
677 |
|
/*{{{ opts_set_language */ |
|
678 |
|
/* Sets the output language, if not already set */ |
|
679 |
|
void opts_set_language(struct Options *t, u8 *language) |
|
680 |
|
{ |
|
681 |
|
if (t->language != 0) |
|
682 |
|
return; |
|
683 |
|
t->language = language; |
|
684 |
|
t->option_word &= ~(OPTS_KRC | OPTS_C | OPTS_ANSIC | OPTS_CPLUSPLUS); |
|
685 |
|
if (strcmp(language, "KR-C") == 0) |
|
686 |
|
t->option_word |= OPTS_KRC; |
|
687 |
|
else if (strcmp (language, "C") == 0) |
|
688 |
|
t->option_word |= OPTS_C; |
|
689 |
|
else if (strcmp (language, "ANSI-C") == 0) |
|
690 |
|
t->option_word |= OPTS_ANSIC; |
|
691 |
|
else if (strcmp (language, "C++") == 0) |
|
692 |
|
t->option_word |= OPTS_CPLUSPLUS; |
|
693 |
|
else { |
|
694 |
|
fprintf(stderr, "unsupported language option %s, defaulting to ANSI-C\n", language); |
|
695 |
|
t->option_word |= OPTS_ANSIC; |
|
696 |
|
} |
|
697 |
|
}/*}}}*/ |
|
698 |
|
/*{{{ opts_set_delimiters */ |
|
699 |
|
/* Sets the delimiters string, if not already set. */ |
|
700 |
|
static void opts_set_delimiters(struct Options *t, u8 *delimiters) |
|
701 |
|
{ |
|
702 |
|
if (t->delimiters == DEFAULT_DELIMITERS) |
|
703 |
|
t->delimiters = delimiters; |
|
704 |
|
}/*}}}*/ |
|
705 |
|
/*{{{ opts_set_slot_name */ |
|
706 |
|
/* sets the keyword key name, if not already set */ |
|
707 |
|
static void opts_set_slot_name(struct Options *t, u8 *name) |
|
708 |
|
{ |
|
709 |
|
if (t->slot_name == DEFAULT_SLOT_NAME) |
|
710 |
|
t->slot_name = name; |
|
711 |
|
}/*}}}*/ |
|
712 |
|
/*{{{ opts_set_initializer_suffix */ |
|
713 |
|
/* sets the struct initializer suffix, if not already set */ |
|
714 |
|
static void opts_set_initializer_suffix(struct Options *t, u8 *initializers) |
|
715 |
|
{ |
|
716 |
|
if (t->initializer_suffix == DEFAULT_INITIALIZER_SUFFIX) |
|
717 |
|
t->initializer_suffix = initializers; |
|
718 |
|
}/*}}}*/ |
|
719 |
|
/*{{{ opts_set_hash_name */ |
|
720 |
|
/* sets the hash function name, if not already set */ |
|
721 |
|
static void opts_set_hash_name(struct Options *t, u8 *name) |
|
722 |
|
{ |
|
723 |
|
if (t->hash_name == DEFAULT_HASH_NAME) |
|
724 |
|
t->hash_name = name; |
|
725 |
|
}/*}}}*/ |
|
726 |
|
/*{{{ opts_set_function_name */ |
|
727 |
|
/* sets the generated function name, if not already set */ |
|
728 |
|
static void opts_set_function_name(struct Options *t, u8 *name) |
|
729 |
|
{ |
|
730 |
|
if (t->function_name == DEFAULT_FUNCTION_NAME) |
|
731 |
|
t->function_name = name; |
|
732 |
|
}/*}}}*/ |
|
733 |
|
/*{{{ opts_set_class_name */ |
|
734 |
|
/* sets the generated class name, if not already set */ |
|
735 |
|
static void opts_set_class_name(struct Options *t, u8 *name) |
|
736 |
|
{ |
|
737 |
|
if (t->class_name == DEFAULT_CLASS_NAME) |
|
738 |
|
t->class_name = name; |
|
739 |
|
}/*}}}*/ |
|
740 |
|
/*{{{ opts_set_stringpool_name */ |
|
741 |
|
/* sets the string pool name, if not already set */ |
|
742 |
|
static void opts_set_stringpool_name(struct Options *t, u8 *name) |
|
743 |
|
{ |
|
744 |
|
if (t->stringpool_name == DEFAULT_STRINGPOOL_NAME) |
|
745 |
|
t->stringpool_name = name; |
|
746 |
|
}/*}}}*/ |
|
747 |
|
/*{{{ opts_set_constants_prefix */ |
|
748 |
|
/* sets the prefix for the constants, if not already set */ |
|
749 |
|
static void opts_set_constants_prefix(struct Options *t, u8 *prefix) |
|
750 |
|
{ |
|
751 |
|
if (t->constants_prefix == DEFAULT_CONSTANTS_PREFIX) |
|
752 |
|
t->constants_prefix = prefix; |
|
753 |
|
}/*}}}*/ |
|
754 |
|
/*{{{ opts_set_wordlist_name */ |
|
755 |
|
/* sets the hash table array name, if not already set */ |
|
756 |
|
static void opts_set_wordlist_name(struct Options *t, u8 *name) |
|
757 |
|
{ |
|
758 |
|
if (t->wordlist_name == DEFAULT_WORDLIST_NAME) |
|
759 |
|
t->wordlist_name = name; |
|
760 |
|
}/*}}}*/ |
|
761 |
|
/*{{{ opts_set_lengthtable_name */ |
|
762 |
|
/* sets the length table array name, if not already set */ |
|
763 |
|
static void opts_set_lengthtable_name(struct Options *t, u8 *name) |
|
764 |
|
{ |
|
765 |
|
if (t->lengthtable_name == DEFAULT_LENGTHTABLE_NAME) |
|
766 |
|
t->lengthtable_name = name; |
|
767 |
|
}/*}}}*/ |
|
768 |
|
/*{{{ opts_set_total_switches */ |
|
769 |
|
/* sets the total number of switch statements, if not already set */ |
|
770 |
|
static void opts_set_total_switches(struct Options *t, s32 total_switches) |
|
771 |
|
{ |
|
772 |
|
if (!OPTS(SWITCH)) { |
|
773 |
|
t->option_word |= OPTS_SWITCH; |
|
774 |
|
t->total_switches = total_switches; |
|
775 |
|
} |
|
776 |
|
}/*}}}*/ |
|
777 |
|
/*{{{ posstrp_new */ |
|
778 |
|
static struct PositionStringParser *posstrp_new(u8 *str, s32 low_bound, |
|
779 |
|
s32 high_bound, s32 end_word_marker, s32 error_value, s32 end_marker) |
|
780 |
|
{ |
|
781 |
|
struct PositionStringParser *t; |
|
782 |
|
|
|
783 |
|
t = calloc(1, sizeof(*t)); |
|
784 |
|
t->str = str; |
|
785 |
|
t->low_bound = low_bound; |
|
786 |
|
t->high_bound = high_bound; |
|
787 |
|
t->end_word_marker = end_word_marker; |
|
788 |
|
t->error_value = error_value; |
|
789 |
|
t->end_marker = end_marker; |
|
790 |
|
t->in_range = false; |
|
791 |
|
return t; |
|
792 |
|
} |
|
793 |
|
/*}}}*/ |
|
794 |
|
/*{{{ posstrp_del */ |
|
795 |
|
static void posstrp_del(struct PositionStringParser *t) |
|
796 |
|
{ |
|
797 |
|
free(t); |
|
798 |
|
}/*}}}*/ |
|
799 |
|
/*{{{ posstrp_nextPosition */ |
|
800 |
|
/* Returns the next key position from the given string */ |
|
801 |
|
static s32 posstrp_nextPosition(struct PositionStringParser *t) |
|
802 |
|
{ |
|
803 |
|
if (t->in_range) { |
|
804 |
|
/* We are inside a range. Return the next value from the range */ |
|
805 |
|
if (++t->range_curr_value >= t->range_upper_bound) |
|
806 |
|
t->in_range = false; |
|
807 |
|
return t->range_curr_value; |
|
808 |
|
} |
|
809 |
|
/* we are not inside a range */ |
|
810 |
|
/* Continue parsing the given string */ |
|
811 |
|
loop { |
|
812 |
|
if (t->str[0] == 0) |
|
813 |
|
break; |
|
814 |
|
switch (t->str[0]) { |
|
815 |
|
case ',': |
|
816 |
|
/* Skip the comma */ |
|
817 |
|
++(t->str); |
|
818 |
|
break; |
|
819 |
|
case '$': |
|
820 |
|
/* Valid key position */ |
|
821 |
|
++(t->str); |
|
822 |
|
return t->end_word_marker; |
|
823 |
|
case '0': case '1': case '2': case '3': case '4': |
|
824 |
|
case '5': case '6': case '7': case '8': case '9': { |
|
825 |
|
/* Valid key position */ |
|
826 |
|
s32 curr_value; |
|
827 |
|
|
|
828 |
|
curr_value = 0; |
|
829 |
|
loop { |
|
830 |
|
if (!isdigit((int)(t->str[0]))) |
|
831 |
|
break; |
|
832 |
|
curr_value = curr_value * 10 + (t->str[0] - '0'); |
|
833 |
|
++(t->str); |
|
834 |
|
} |
|
835 |
|
if (t->str[0] == '-') { |
|
836 |
|
++(t->str); |
|
837 |
|
/* starting a range of key positions */ |
|
838 |
|
t->in_range = true; |
|
839 |
|
|
|
840 |
|
t->range_upper_bound = 0; |
|
841 |
|
loop { |
|
842 |
|
if (!isdigit((int)(t->str[0]))) |
|
843 |
|
break; |
|
844 |
|
t->range_upper_bound = t->range_upper_bound * 10 |
|
845 |
|
+ (t->str[0] - '0'); |
|
846 |
|
++(t->str); |
|
847 |
|
} |
|
848 |
|
/* Verify range's upper bound */ |
|
849 |
|
if (!(t->range_upper_bound > curr_value && t->range_upper_bound |
|
850 |
|
<= t->high_bound)) |
|
851 |
|
return t->error_value; |
|
852 |
|
t->range_curr_value = curr_value; |
|
853 |
|
} |
|
854 |
|
/* Verify range's lower bound */ |
|
855 |
|
if (!(curr_value >= t->low_bound && curr_value <= t->high_bound)) |
|
856 |
|
return t->error_value; |
|
857 |
|
return curr_value; |
|
858 |
|
} |
|
859 |
|
default: |
|
860 |
|
/* Invalid syntax. */ |
|
861 |
|
return t->error_value; |
|
862 |
|
} |
|
863 |
|
} |
|
864 |
|
return t->end_marker; |
|
865 |
|
}/*}}}*/ |
|
866 |
|
/*{{{ opts_print */ |
|
867 |
|
static void opts_print(struct Options *t) |
|
868 |
|
{ |
|
869 |
|
s32 i; |
|
870 |
|
|
|
871 |
|
printf("/* Command-line: "); |
|
872 |
|
i = 0; |
|
873 |
|
loop { |
|
874 |
|
u8 *arg; |
|
875 |
|
|
|
876 |
|
if (i >= t->argument_count) |
|
877 |
|
break; |
|
878 |
|
arg = t->argument_vector[i]; |
|
879 |
|
/* escape arg if it contains shell metacharacters */ |
|
880 |
|
if (*arg == '-') { |
|
881 |
|
putchar(*arg); |
|
882 |
|
++arg; |
|
883 |
|
if ((*arg >= 'A' && *arg <= 'Z') || (*arg >= 'a' && *arg <= 'z')) { |
|
884 |
|
putchar(*arg); |
|
885 |
|
++arg; |
|
886 |
|
} else if (*arg == '-') { |
|
887 |
|
loop { |
|
888 |
|
putchar(*arg); |
|
889 |
|
++arg; |
|
890 |
|
if (!((*arg >= 'A' && *arg <= 'Z') || (*arg >= 'a' |
|
891 |
|
&& *arg <= 'z') || *arg == '-')) |
|
892 |
|
break; |
|
893 |
|
} |
|
894 |
|
if (*arg == '=') { |
|
895 |
|
putchar(*arg); |
|
896 |
|
++arg; |
|
897 |
|
} |
|
898 |
|
} |
|
899 |
|
} |
|
900 |
|
if (strpbrk(arg, "\t\n !\"#$&'()*;<>?[\\]`{|}~") != 0) { |
|
901 |
|
if (strchr(arg, '\'') != 0) { |
|
902 |
|
putchar('"'); |
|
903 |
|
loop { |
|
904 |
|
if (*arg == 0) |
|
905 |
|
break; |
|
906 |
|
if (*arg == '\"' || *arg == '\\' || *arg == '$' |
|
907 |
|
|| *arg == '`') |
|
908 |
|
putchar('\\'); |
|
909 |
|
putchar(*arg); |
|
910 |
|
++arg; |
|
911 |
|
} |
|
912 |
|
putchar('"'); |
|
913 |
|
} else { |
|
914 |
|
putchar('\''); |
|
915 |
|
loop { |
|
916 |
|
if (*arg == 0) |
|
917 |
|
break; |
|
918 |
|
if (*arg == '\\') |
|
919 |
|
putchar('\\'); |
|
920 |
|
putchar(*arg); |
|
921 |
|
++arg; |
|
922 |
|
} |
|
923 |
|
putchar('\''); |
|
924 |
|
} |
|
925 |
|
} else |
|
926 |
|
printf("%s", arg); |
|
927 |
|
printf(" "); |
|
928 |
|
++i; |
|
929 |
|
} |
|
930 |
|
printf(" */"); |
|
931 |
|
} |
|
932 |
|
/*}}}*/ |
|
933 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
934 |
|
#define EPILOG |
|
935 |
|
#include "namespace/globals.h" |
|
936 |
|
#include "namespace/options.h" |
|
937 |
|
#include "namespace/positions.h" |
|
938 |
|
#undef EPILOG |
|
939 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
940 |
|
#endif |
File output.c added (mode: 100644) (index 0000000..ce76117) |
|
1 |
|
#ifndef CGPERF_OUTPUT_C |
|
2 |
|
#define CGPERF_OUTPUT_C |
|
3 |
|
#include <stdbool.h> |
|
4 |
|
#include "c_fixing.h" |
|
5 |
|
#include "globals.h" |
|
6 |
|
#include "options.h" |
|
7 |
|
#include "output.h" |
|
8 |
|
#include "keyword.h" |
|
9 |
|
#include "keyword_list.h" |
|
10 |
|
#include "positions.h" |
|
11 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
12 |
|
#include "namespace/globals.h" |
|
13 |
|
#include "namespace/options.h" |
|
14 |
|
#include "namespace/output.h" |
|
15 |
|
#include "namespace/output.c" |
|
16 |
|
#include "namespace/keyword.h" |
|
17 |
|
#include "namespace/keyword_list.h" |
|
18 |
|
#include "namespace/positions.h" |
|
19 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
20 |
|
/* We use a downcase table because when called repeatedly, the code gperf_downcase[c] |
|
21 |
|
is faster than |
|
22 |
|
if (c >= 'A' && c <= 'Z') |
|
23 |
|
c += 'a' - 'A'; |
|
24 |
|
*/ |
|
25 |
|
#define USE_DOWNCASE_TABLE 1 |
|
26 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
27 |
|
/*{{{ local */ |
|
28 |
|
/*{{{ types */ |
|
29 |
|
/* |
|
30 |
|
* because of the way output_keyword_table works, every duplicate set is |
|
31 |
|
* stored contiguously in the wordlist array |
|
32 |
|
*/ |
|
33 |
|
struct Duplicate_Entry |
|
34 |
|
{ |
|
35 |
|
s32 hash_value; /* hash value for this particular duplicate set */ |
|
36 |
|
s32 index; /* index into the main keyword storage array */ |
|
37 |
|
s32 count; /* number of consecutive duplicates at this index */ |
|
38 |
|
}; |
|
39 |
|
/*}}}*/ |
|
40 |
|
/*{{{ variables */ |
|
41 |
|
/* the "register " storage-class specifier */ |
|
42 |
|
static u8 *register_scs; |
|
43 |
|
/* the "const " qualifier */ |
|
44 |
|
static u8 *const_always; |
|
45 |
|
/* the "const " qualifier, for read-only arrays */ |
|
46 |
|
static u8 *const_readonly_array; |
|
47 |
|
/* the "const " qualifier, for the array type */ |
|
48 |
|
static u8 *const_for_struct; |
|
49 |
|
/*}}} variables -- END */ |
|
50 |
|
/*{{{ code */ |
|
51 |
|
/*{{{ output_string */ |
|
52 |
|
/* |
|
53 |
|
* Outputs a keyword, as a string: enclosed in double quotes, escaping backslashes, double quote and |
|
54 |
|
* unprintable characters |
|
55 |
|
*/ |
|
56 |
|
static void output_string(u8 *key, s32 len) |
|
57 |
|
{ |
|
58 |
|
putchar('"'); |
|
59 |
|
loop { |
|
60 |
|
u8 c; |
|
61 |
|
|
|
62 |
|
if (len <= 0) |
|
63 |
|
break; |
|
64 |
|
c = (u8)(*key++); |
|
65 |
|
if (isprint(c)) { |
|
66 |
|
if (c == '"' || c == '\\') |
|
67 |
|
putchar('\\'); |
|
68 |
|
putchar(c); |
|
69 |
|
} else { |
|
70 |
|
/* |
|
71 |
|
* Use octal escapes, not hexadecimal escapes, because some old C compilers |
|
72 |
|
* didn't understand hexadecimal escapes, and because hexadecimal escapes |
|
73 |
|
* are not limited to 2 digits, thus needing special care if the following |
|
74 |
|
* character happens to be a digit. |
|
75 |
|
*/ |
|
76 |
|
putchar('\\'); |
|
77 |
|
putchar('0' + ((c >> 6) & 7)); |
|
78 |
|
putchar('0' + ((c >> 3) & 7)); |
|
79 |
|
putchar('0' + (c & 7)); |
|
80 |
|
} |
|
81 |
|
len--; |
|
82 |
|
} |
|
83 |
|
putchar('"'); |
|
84 |
|
}/*}}}*/ |
|
85 |
|
/*{{{ output_line_directive */ |
|
86 |
|
/* outputs a #line directive, referring to the given line number */ |
|
87 |
|
static void output_line_directive(u32 lineno) |
|
88 |
|
{ |
|
89 |
|
u8 *file_name; |
|
90 |
|
|
|
91 |
|
file_name = options->input_file_name; |
|
92 |
|
if (file_name != 0) { |
|
93 |
|
printf("#line %u ", lineno); |
|
94 |
|
output_string(file_name, (s32)strlen(file_name)); |
|
95 |
|
printf("\n"); |
|
96 |
|
} |
|
97 |
|
}/*}}}*/ |
|
98 |
|
/*{{{ output_constant_define */ |
|
99 |
|
static void output_constant_define(u8 *name, s32 value) |
|
100 |
|
{ |
|
101 |
|
u8 *prefix; |
|
102 |
|
u8 *combined_name; |
|
103 |
|
|
|
104 |
|
prefix = options->constants_prefix; |
|
105 |
|
combined_name = calloc(strlen(prefix) + strlen(name) + 1, sizeof(u8)); |
|
106 |
|
strcpy(combined_name, prefix); |
|
107 |
|
strcpy(combined_name + strlen(prefix), name); |
|
108 |
|
printf("#define %s %d\n", combined_name, value); |
|
109 |
|
free(combined_name); |
|
110 |
|
}/*}}}*/ |
|
111 |
|
/*{{{ output_constant_enum */ |
|
112 |
|
static void output_constant_enum(u8 *name, s32 value, u8 *indentation, bool *pending_comma) |
|
113 |
|
{ |
|
114 |
|
u8 *prefix; |
|
115 |
|
u8 *combined_name; |
|
116 |
|
|
|
117 |
|
prefix = options->constants_prefix; |
|
118 |
|
combined_name = calloc(strlen(prefix) + strlen(name) + 1, sizeof(u8)); |
|
119 |
|
strcpy(combined_name, prefix); |
|
120 |
|
strcpy(combined_name + strlen(prefix), name); |
|
121 |
|
if (*pending_comma) |
|
122 |
|
printf (",\n"); |
|
123 |
|
printf("%s %s = %d", indentation, combined_name, value); |
|
124 |
|
*pending_comma = true; |
|
125 |
|
free(combined_name); |
|
126 |
|
}/*}}}*/ |
|
127 |
|
/*{{{ ouput_upperlower_table */ |
|
128 |
|
#if USE_DOWNCASE_TABLE |
|
129 |
|
static void output_upperlower_table(void) |
|
130 |
|
{ |
|
131 |
|
u32 c; |
|
132 |
|
|
|
133 |
|
printf( |
|
134 |
|
"#ifndef GPERF_DOWNCASE\n" |
|
135 |
|
"#define GPERF_DOWNCASE 1\n" |
|
136 |
|
"static %sunsigned char gperf_downcase[256] =\n" |
|
137 |
|
" {", |
|
138 |
|
const_readonly_array); |
|
139 |
|
c = 0; |
|
140 |
|
loop { |
|
141 |
|
if (c >= 256) |
|
142 |
|
break; |
|
143 |
|
if ((c % 15) == 0) |
|
144 |
|
printf("\n "); |
|
145 |
|
printf(" %3d", c >= 'A' && c <= 'Z' ? c + 'a' - 'A' : c); |
|
146 |
|
if (c < 255) |
|
147 |
|
printf (","); |
|
148 |
|
++c; |
|
149 |
|
} |
|
150 |
|
printf("\n" |
|
151 |
|
" };\n" |
|
152 |
|
"#endif\n\n"); |
|
153 |
|
} |
|
154 |
|
#endif |
|
155 |
|
/*}}}*/ |
|
156 |
|
/*{{{ output_upperlower_memcmp */ |
|
157 |
|
/* output gperf's ASCII-case insensitive memcmp replacement */ |
|
158 |
|
static void output_upperlower_memcmp(void) |
|
159 |
|
{ |
|
160 |
|
printf( |
|
161 |
|
"#ifndef GPERF_CASE_MEMCMP\n" |
|
162 |
|
"#define GPERF_CASE_MEMCMP 1\n" |
|
163 |
|
"static int\n" |
|
164 |
|
"gperf_case_memcmp "); |
|
165 |
|
printf(OPTS(KRC) ? "(s1, s2, n)\n" |
|
166 |
|
" %schar *s1;\n" |
|
167 |
|
" %schar *s2;\n" |
|
168 |
|
" %ssize_t n;\n" : |
|
169 |
|
OPTS(C) ? "(s1, s2, n)\n" |
|
170 |
|
" %sconst char *s1;\n" |
|
171 |
|
" %sconst char *s2;\n" |
|
172 |
|
" %ssize_t n;\n" : |
|
173 |
|
OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2, %ssize_t n)\n" : |
|
174 |
|
"", register_scs, register_scs, register_scs); |
|
175 |
|
#if USE_DOWNCASE_TABLE |
|
176 |
|
printf( |
|
177 |
|
"{\n" |
|
178 |
|
" for (; n > 0;)\n" |
|
179 |
|
" {\n" |
|
180 |
|
" unsiGNED char c1 = gperf_downcase[(unsigned char)*s1++];\n" |
|
181 |
|
" unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n" |
|
182 |
|
" if (c1 == c2)\n" |
|
183 |
|
" {\n" |
|
184 |
|
" n--;\n" |
|
185 |
|
" continue;\n" |
|
186 |
|
" }\n" |
|
187 |
|
" return (int)c1 - (int)c2;\n" |
|
188 |
|
" }\n" |
|
189 |
|
" return 0;\n" |
|
190 |
|
"}\n"); |
|
191 |
|
#else |
|
192 |
|
printf( |
|
193 |
|
"{\n" |
|
194 |
|
" for (; n > 0;)\n" |
|
195 |
|
" {\n" |
|
196 |
|
" unsigned char c1 = *s1++;\n" |
|
197 |
|
" unsigned char c2 = *s2++;\n" |
|
198 |
|
" if (c1 >= 'A' && c1 <= 'Z')\n" |
|
199 |
|
" c1 += 'a' - 'A';\n" |
|
200 |
|
" if (c2 >= 'A' && c2 <= 'Z')\n" |
|
201 |
|
" c2 += 'a' - 'A';\n" |
|
202 |
|
" if (c1 == c2)\n" |
|
203 |
|
" {\n" |
|
204 |
|
" n--;\n" |
|
205 |
|
" continue;\n" |
|
206 |
|
" }\n" |
|
207 |
|
" return (int)c1 - (int)c2;\n" |
|
208 |
|
" }\n" |
|
209 |
|
" return 0;\n" |
|
210 |
|
"}\n"); |
|
211 |
|
#endif |
|
212 |
|
printf( |
|
213 |
|
"#endif\n\n"); |
|
214 |
|
}/*}}}*/ |
|
215 |
|
/*{{{ output_upperlower_strncmp */ |
|
216 |
|
/* output gperf's ASCII-case insensitive strncmp replacement */ |
|
217 |
|
static void output_upperlower_strncmp(void) |
|
218 |
|
{ |
|
219 |
|
printf( |
|
220 |
|
"#ifndef GPERF_CASE_STRNCMP\n" |
|
221 |
|
"#define GPERF_CASE_STRNCMP 1\n" |
|
222 |
|
"static int\n" |
|
223 |
|
"gperf_case_strncmp "); |
|
224 |
|
printf(OPTS(KRC) ? "(s1, s2, n)\n" |
|
225 |
|
" %schar *s1;\n" |
|
226 |
|
" %schar *s2;\n" |
|
227 |
|
" %ssize_t n;\n" : |
|
228 |
|
OPTS(C) ? "(s1, s2, n)\n" |
|
229 |
|
" %sconst char *s1;\n" |
|
230 |
|
" %sconst char *s2;\n" |
|
231 |
|
" %ssize_t n;\n" : |
|
232 |
|
OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2, %ssize_t n)\n" : |
|
233 |
|
"", register_scs, register_scs, register_scs); |
|
234 |
|
#if USE_DOWNCASE_TABLE |
|
235 |
|
printf( |
|
236 |
|
"{\n" |
|
237 |
|
" for (; n > 0;)\n" |
|
238 |
|
" {\n" |
|
239 |
|
" unsigned char c1 = gperf_downcase[(unsigned char)*s1++];\n" |
|
240 |
|
" unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n" |
|
241 |
|
" if (c1 != 0 && c1 == c2)\n" |
|
242 |
|
" {\n" |
|
243 |
|
" n--;\n" |
|
244 |
|
" continue;\n" |
|
245 |
|
" }\n" |
|
246 |
|
" return (int)c1 - (int)c2;\n" |
|
247 |
|
" }\n" |
|
248 |
|
" return 0;\n" |
|
249 |
|
"}\n"); |
|
250 |
|
#else |
|
251 |
|
printf( |
|
252 |
|
"{\n" |
|
253 |
|
" for (; n > 0;)\n" |
|
254 |
|
" {\n" |
|
255 |
|
" unsigned char c1 = *s1++;\n" |
|
256 |
|
" unsigned char c2 = *s2++;\n" |
|
257 |
|
" if (c1 >= 'A' && c1 <= 'Z')\n" |
|
258 |
|
" c1 += 'a' - 'A';\n" |
|
259 |
|
" if (c2 >= 'A' && c2 <= 'Z')\n" |
|
260 |
|
" c2 += 'a' - 'A';\n" |
|
261 |
|
" if (c1 != 0 && c1 == c2)\n" |
|
262 |
|
" {\n" |
|
263 |
|
" n--;\n" |
|
264 |
|
" continue;\n" |
|
265 |
|
" }\n" |
|
266 |
|
" return (int)c1 - (int)c2;\n" |
|
267 |
|
" }\n" |
|
268 |
|
" return 0;\n" |
|
269 |
|
"}\n"); |
|
270 |
|
#endif |
|
271 |
|
printf( |
|
272 |
|
"#endif\n\n"); |
|
273 |
|
}/*}}}*/ |
|
274 |
|
/*{{{ output_upperlower_strcmp */ |
|
275 |
|
/* output gperf's ASCII-case insensitive strcmp replacement */ |
|
276 |
|
static void output_upperlower_strcmp(void) |
|
277 |
|
{ |
|
278 |
|
printf( |
|
279 |
|
"#ifndef GPERF_CASE_STRCMP\n" |
|
280 |
|
"#define GPERF_CASE_STRCMP 1\n" |
|
281 |
|
"static int\n" |
|
282 |
|
"gperf_case_strcmp "); |
|
283 |
|
printf(OPTS(KRC) ? "(s1, s2)\n" |
|
284 |
|
" %schar *s1;\n" |
|
285 |
|
" %schar *s2;\n" : |
|
286 |
|
OPTS(C) ? "(s1, s2)\n" |
|
287 |
|
" %sconst char *s1;\n" |
|
288 |
|
" %sconst char *s2;\n" : |
|
289 |
|
OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2)\n" : |
|
290 |
|
"", register_scs, register_scs); |
|
291 |
|
#if USE_DOWNCASE_TABLE |
|
292 |
|
printf( |
|
293 |
|
"{\n" |
|
294 |
|
" for (;;)\n" |
|
295 |
|
" {\n" |
|
296 |
|
" unsigned char c1 = gperf_downcase[(unsigned char)*s1++];\n" |
|
297 |
|
" unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n" |
|
298 |
|
" if (c1 != 0 && c1 == c2)\n" |
|
299 |
|
" continue;\n" |
|
300 |
|
" return (int)c1 - (int)c2;\n" |
|
301 |
|
" }\n" |
|
302 |
|
"}\n"); |
|
303 |
|
#else |
|
304 |
|
printf( |
|
305 |
|
"{\n" |
|
306 |
|
" for (;;)\n" |
|
307 |
|
" {\n" |
|
308 |
|
" unsigned char c1 = *s1++;\n" |
|
309 |
|
" unsigned char c2 = *s2++;\n" |
|
310 |
|
" if (c1 >= 'A' && c1 <= 'Z')\n" |
|
311 |
|
" c1 += 'a' - 'A';\n" |
|
312 |
|
" if (c2 >= 'A' && c2 <= 'Z')\n" |
|
313 |
|
" c2 += 'a' - 'A';\n" |
|
314 |
|
" if (c1 != 0 && c1 == c2)\n" |
|
315 |
|
" continue;\n" |
|
316 |
|
" return (int)c1 - (int)c2;\n" |
|
317 |
|
" }\n" |
|
318 |
|
"}\n"); |
|
319 |
|
#endif |
|
320 |
|
printf |
|
321 |
|
("#endif\n\n"); |
|
322 |
|
}/*}}}*/ |
|
323 |
|
/*{{{ smallest_integral_type */ |
|
324 |
|
/* returns the smallest unsigned C type capable of holding integers up to N */ |
|
325 |
|
static u8 *smallest_integral_type(s32 n) |
|
326 |
|
{ |
|
327 |
|
if (n <= UCHAR_MAX) return "unsigned char"; |
|
328 |
|
if (n <= USHRT_MAX) return "unsigned short"; |
|
329 |
|
return "unsigned int"; |
|
330 |
|
}/*}}}*/ |
|
331 |
|
/*{{{ smallest_integral_type_2 */ |
|
332 |
|
/* returns the smallest signed C type capable of holding integers from MIN to MAX */ |
|
333 |
|
static u8 *smallest_integral_type_2(s32 min, s32 max) |
|
334 |
|
{ |
|
335 |
|
if (OPTS(ANSIC) || OPTS(CPLUSPLUS)) |
|
336 |
|
if (min >= SCHAR_MIN && max <= SCHAR_MAX) return "signed char"; |
|
337 |
|
if (min >= SHRT_MIN && max <= SHRT_MAX) return "short"; |
|
338 |
|
return "int"; |
|
339 |
|
}/*}}}*/ |
|
340 |
|
/*{{{ output_const_type */ |
|
341 |
|
/* |
|
342 |
|
* Outputs a type and a const specifier (i.e. "const " or ""). |
|
343 |
|
* The output is terminated with a space. |
|
344 |
|
*/ |
|
345 |
|
static void output_const_type(u8 *const_string, u8 *type_string) |
|
346 |
|
{ |
|
347 |
|
if (type_string[strlen(type_string) - 1] == '*') |
|
348 |
|
/* for pointer types, put the 'const' after the type */ |
|
349 |
|
printf( |
|
350 |
|
"%s %s", type_string, const_string); |
|
351 |
|
else |
|
352 |
|
/* for scalar or struct types, put the 'const' before the type */ |
|
353 |
|
printf( |
|
354 |
|
"%s%s ", const_string, type_string); |
|
355 |
|
}/*}}}*/ |
|
356 |
|
/*{{{ output_keyword_blank_entries */ |
|
357 |
|
static void output_keyword_blank_entries(s32 count, u8 *indent) |
|
358 |
|
{ |
|
359 |
|
s32 columns; |
|
360 |
|
s32 column; |
|
361 |
|
s32 i; |
|
362 |
|
|
|
363 |
|
if (OPTS(TYPE)) { |
|
364 |
|
columns = 58 / (4 + (OPTS(SHAREDLIB) ? 2 : OPTS(NULLSTRINGS) ? 8 : 2) |
|
365 |
|
+ strlen(options->initializer_suffix)); |
|
366 |
|
if (columns == 0) |
|
367 |
|
columns = 1; |
|
368 |
|
} else |
|
369 |
|
columns = (OPTS(SHAREDLIB) ? 9 : OPTS(NULLSTRINGS) ? 4 : 9); |
|
370 |
|
column = 0; |
|
371 |
|
i = 0; |
|
372 |
|
loop { |
|
373 |
|
if (i >= count) |
|
374 |
|
break; |
|
375 |
|
if ((column % columns) == 0) { |
|
376 |
|
if (i > 0) |
|
377 |
|
printf( |
|
378 |
|
",\n"); |
|
379 |
|
printf( |
|
380 |
|
"%s ", indent); |
|
381 |
|
} else if (i > 0) |
|
382 |
|
printf(", "); |
|
383 |
|
if (OPTS(TYPE)) |
|
384 |
|
printf("{"); |
|
385 |
|
if (OPTS(SHAREDLIB)) |
|
386 |
|
printf("-1"); |
|
387 |
|
else { |
|
388 |
|
if (OPTS(NULLSTRINGS)) |
|
389 |
|
printf("(char*)0"); |
|
390 |
|
else |
|
391 |
|
printf("\"\""); |
|
392 |
|
} |
|
393 |
|
if (OPTS(TYPE)) |
|
394 |
|
printf( |
|
395 |
|
"%s}", options->initializer_suffix); |
|
396 |
|
++column; |
|
397 |
|
++i; |
|
398 |
|
} |
|
399 |
|
}/*}}}*/ |
|
400 |
|
/*{{{ output_keyword_entry */ |
|
401 |
|
static void output_keyword_entry(struct Keyword *tmp, s32 stringpool_index, u8 *indent, |
|
402 |
|
bool is_duplicate) |
|
403 |
|
{ |
|
404 |
|
if (OPTS(TYPE)) |
|
405 |
|
output_line_directive(tmp->lineno); |
|
406 |
|
printf( |
|
407 |
|
"%s ", indent); |
|
408 |
|
if (OPTS(TYPE)) |
|
409 |
|
printf("{"); |
|
410 |
|
if (OPTS(SHAREDLIB)) |
|
411 |
|
/* |
|
412 |
|
* How to determine a certain offset in stringpool at compile time? |
|
413 |
|
* - The standard way would be to use the 'offsetof' macro. But it is only |
|
414 |
|
* defined in <stddef.h>, and <stddef.h> is not among the prerequisite |
|
415 |
|
* header files that the user must #include. |
|
416 |
|
* - The next best way would be to take the address and cast to 'intptr_t' |
|
417 |
|
* or 'uintptr_t'. But these types are only defined in <stdint.h>, and |
|
418 |
|
* <stdint.h> is not among the prerequisite header files that the user |
|
419 |
|
* must #include. |
|
420 |
|
* - The next best approximation of 'uintptr_t' is 'size_t'. It is defined |
|
421 |
|
* in the prerequisite header <string.h>. |
|
422 |
|
* - The types 'long' and 'unsigned long' do work as well, but on 64-bit |
|
423 |
|
* native Windows platforms, they don't have the same size as pointers |
|
424 |
|
* and therefore generate warnings. |
|
425 |
|
*/ |
|
426 |
|
printf("(int)(size_t)&((struct %s_t *)0)->%s_str%d", |
|
427 |
|
options->stringpool_name, options->stringpool_name, stringpool_index); |
|
428 |
|
else |
|
429 |
|
output_string(tmp->allchars, tmp->allchars_length); |
|
430 |
|
if (OPTS(TYPE)) { |
|
431 |
|
if (strlen(tmp->rest) > 0) |
|
432 |
|
printf(",%s", tmp->rest); |
|
433 |
|
printf("}"); |
|
434 |
|
} |
|
435 |
|
if (OPTS(DEBUG)) { |
|
436 |
|
printf(" /* "); |
|
437 |
|
if (is_duplicate) |
|
438 |
|
printf("hash value duplicate, "); |
|
439 |
|
else |
|
440 |
|
printf("hash value = %d, ", tmp->hash_value); |
|
441 |
|
printf("index = %d */", tmp->final_index); |
|
442 |
|
} |
|
443 |
|
|
|
444 |
|
}/*}}}*/ |
|
445 |
|
/*{{{ output_switch_case */ |
|
446 |
|
/* Output a single switch case (including duplicates). Advance list. */ |
|
447 |
|
static struct Keyword_List *output_switch_case(struct Keyword_List *list, s32 indent, |
|
448 |
|
s32 *jumps_away) |
|
449 |
|
{ |
|
450 |
|
if (OPTS(DEBUG)) |
|
451 |
|
printf( |
|
452 |
|
"%*s/* hash value = %4d, keyword = \"%.*s\" */\n", indent, "", list->kw->hash_value, |
|
453 |
|
list->kw->allchars_length, list->kw->allchars); |
|
454 |
|
if (OPTS(DUP) && list->kw->duplicate_link) { |
|
455 |
|
s32 count; |
|
456 |
|
struct Keyword *links; |
|
457 |
|
|
|
458 |
|
if (OPTS(LENTABLE)) |
|
459 |
|
printf( |
|
460 |
|
"%*slengthptr = &%s[%d];\n", indent, "", options->lengthtable_name, list->kw->final_index); |
|
461 |
|
printf( |
|
462 |
|
"%*swordptr = &%s[%d];\n", indent, "", options->wordlist_name, list->kw->final_index); |
|
463 |
|
count = 0; |
|
464 |
|
links = list->kw; |
|
465 |
|
loop { |
|
466 |
|
if (links == 0) |
|
467 |
|
break; |
|
468 |
|
++count; |
|
469 |
|
links = links->duplicate_link; |
|
470 |
|
} |
|
471 |
|
printf( |
|
472 |
|
"%*swordendptr = wordptr + %d;\n" |
|
473 |
|
"%*sgoto multicompare;\n", indent, "", count, indent, ""); |
|
474 |
|
*jumps_away = 1; |
|
475 |
|
} else { |
|
476 |
|
if (OPTS(LENTABLE)) { |
|
477 |
|
printf( |
|
478 |
|
"%*sif (len == %d)\n" |
|
479 |
|
"%*s {\n", indent, "", list->kw->allchars_length, indent, ""); |
|
480 |
|
indent += 4; |
|
481 |
|
} |
|
482 |
|
printf("%*sresword = ", indent, ""); |
|
483 |
|
if (OPTS(TYPE)) |
|
484 |
|
printf("&%s[%d]", options->wordlist_name, list->kw->final_index); |
|
485 |
|
else |
|
486 |
|
output_string(list->kw->allchars, list->kw->allchars_length); |
|
487 |
|
printf(";\n"); |
|
488 |
|
printf( |
|
489 |
|
"%*sgoto compare;\n", indent, ""); |
|
490 |
|
if (OPTS(LENTABLE)) { |
|
491 |
|
indent -= 4; |
|
492 |
|
printf( |
|
493 |
|
"%*s }\n", indent, ""); |
|
494 |
|
} else |
|
495 |
|
*jumps_away = 1; |
|
496 |
|
} |
|
497 |
|
return list->next; |
|
498 |
|
}/*}}}*/ |
|
499 |
|
/*{{{ output_switches */ |
|
500 |
|
/* |
|
501 |
|
* output a total of size cases, grouped into num_switches switch statements, where 0 < |
|
502 |
|
* num_switches <= size |
|
503 |
|
*/ |
|
504 |
|
static void output_switches(struct Keyword_List *list, s32 num_switches, s32 size, |
|
505 |
|
s32 min_hash_value, s32 max_hash_value, s32 indent) |
|
506 |
|
{ |
|
507 |
|
if (OPTS(DEBUG)) |
|
508 |
|
printf( |
|
509 |
|
"%*s/* know %d <= key <= %d, contains %d cases */\n", indent, "", min_hash_value, max_hash_value, |
|
510 |
|
size); |
|
511 |
|
if (num_switches > 1) { |
|
512 |
|
s32 part1; |
|
513 |
|
s32 part2; |
|
514 |
|
s32 size1; |
|
515 |
|
s32 size2; |
|
516 |
|
struct Keyword_List *tmp; |
|
517 |
|
s32 count; |
|
518 |
|
|
|
519 |
|
part1 = num_switches / 2; |
|
520 |
|
part2 = num_switches - part1; |
|
521 |
|
size1 = (s32)((f64)(size) / (f64)(num_switches) * (f64)(part1) + 0.5); |
|
522 |
|
size2 = size - size1; |
|
523 |
|
|
|
524 |
|
tmp = list; |
|
525 |
|
count = size1; |
|
526 |
|
loop { |
|
527 |
|
if (count <= 0) |
|
528 |
|
break; |
|
529 |
|
tmp = tmp->next; |
|
530 |
|
count--; |
|
531 |
|
} |
|
532 |
|
printf( |
|
533 |
|
"%*sif (key < %d)\n" |
|
534 |
|
"%*s {\n", indent, "", tmp->kw->hash_value, indent, ""); |
|
535 |
|
output_switches(list, part1, size1, min_hash_value, tmp->kw->hash_value - 1, |
|
536 |
|
indent + 4); |
|
537 |
|
printf( |
|
538 |
|
"%*s }\n" |
|
539 |
|
"%*selse\n" |
|
540 |
|
"%*s {\n", indent, "", indent, "", indent, ""); |
|
541 |
|
output_switches(tmp, part2, size2, tmp->kw->hash_value, max_hash_value, indent + 4); |
|
542 |
|
printf( |
|
543 |
|
"%*s }\n", indent, ""); |
|
544 |
|
} else { |
|
545 |
|
s32 lowest_case_value; |
|
546 |
|
|
|
547 |
|
lowest_case_value = list->kw->hash_value; |
|
548 |
|
if (size == 1) { |
|
549 |
|
s32 jumps_away; |
|
550 |
|
|
|
551 |
|
jumps_away = 0; |
|
552 |
|
if (min_hash_value == max_hash_value) |
|
553 |
|
output_switch_case(list, indent, &jumps_away); |
|
554 |
|
else { |
|
555 |
|
printf( |
|
556 |
|
"%*sif (key == %d)\n" |
|
557 |
|
"%*s {\n", indent, "", lowest_case_value, indent, ""); |
|
558 |
|
output_switch_case(list, indent + 4, &jumps_away); |
|
559 |
|
printf( |
|
560 |
|
"%*s }\n", indent, ""); |
|
561 |
|
} |
|
562 |
|
} else { |
|
563 |
|
if (lowest_case_value == 0) |
|
564 |
|
printf( |
|
565 |
|
"%*sswitch (key)\n", indent, ""); |
|
566 |
|
else |
|
567 |
|
printf( |
|
568 |
|
"%*sswitch (key - %d)\n", indent, "", lowest_case_value); |
|
569 |
|
printf( |
|
570 |
|
"%*s {\n", indent, ""); |
|
571 |
|
loop { |
|
572 |
|
s32 jumps_away; |
|
573 |
|
|
|
574 |
|
if (size <= 0) |
|
575 |
|
break; |
|
576 |
|
jumps_away = 0; |
|
577 |
|
printf( |
|
578 |
|
"%*s case %d:\n", indent, "", list->kw->hash_value - lowest_case_value); |
|
579 |
|
list = output_switch_case(list, indent + 6, &jumps_away); |
|
580 |
|
if (!jumps_away) |
|
581 |
|
printf( |
|
582 |
|
"%*s break;\n", indent, ""); |
|
583 |
|
size--; |
|
584 |
|
} |
|
585 |
|
printf( |
|
586 |
|
"%*s }\n", indent, ""); |
|
587 |
|
} |
|
588 |
|
} |
|
589 |
|
}/*}}}*/ |
|
590 |
|
/*{{{ output_firstchar_comparison */ |
|
591 |
|
/* |
|
592 |
|
* Outputs the comparison expression for the first byte. Returns true if the this comparison is |
|
593 |
|
* complete. |
|
594 |
|
*/ |
|
595 |
|
static bool output_firstchar_comparison(u8 *expr1, u8 *expr2) |
|
596 |
|
{ |
|
597 |
|
/* |
|
598 |
|
* First, we emit a comparison of the first byte of the two strings. This catches most |
|
599 |
|
* cases where the string being looked up is not in the hash table but happens to have the |
|
600 |
|
* same hash code as an element of the hash table. |
|
601 |
|
*/ |
|
602 |
|
if (OPTS(UPPERLOWER)) { |
|
603 |
|
/* incomplete comparison, just for speedup */ |
|
604 |
|
printf("(((unsigned char)*"); |
|
605 |
|
printf("%s", expr1); |
|
606 |
|
printf(" ^ (unsigned char)*"); |
|
607 |
|
printf("%s", expr2); |
|
608 |
|
printf(") & ~32) == 0"); |
|
609 |
|
return false; |
|
610 |
|
} |
|
611 |
|
/* Complete comparison. */ |
|
612 |
|
printf("*"); |
|
613 |
|
printf("%s", expr1); |
|
614 |
|
printf(" == *"); |
|
615 |
|
printf("%s", expr2); |
|
616 |
|
return true; |
|
617 |
|
}/*}}}*/ |
|
618 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
619 |
|
/*{{{ output_comparison_X */ |
|
620 |
|
/* |
|
621 |
|
* Outputs the comparison expression. |
|
622 |
|
* expr1 outputs a simple expression of type 'const char *' referring to the string being looked up. |
|
623 |
|
* expr2 outputs a simple expression of type 'const char *' referring to the constant string stored |
|
624 |
|
* in the gperf generated hash table. |
|
625 |
|
*/ |
|
626 |
|
/*{{{ output_comparison_memcmp */ |
|
627 |
|
static void output_comparison_memcmp(u8 *expr1, u8 *expr2) |
|
628 |
|
{ |
|
629 |
|
bool firstchar_done; |
|
630 |
|
|
|
631 |
|
firstchar_done = output_firstchar_comparison(expr1, expr2); |
|
632 |
|
printf(" && !"); |
|
633 |
|
if (OPTS(UPPERLOWER)) |
|
634 |
|
printf("gperf_case_"); |
|
635 |
|
printf("memcmp ("); |
|
636 |
|
if (firstchar_done) { |
|
637 |
|
printf("%s", expr1); |
|
638 |
|
printf(" + 1, "); |
|
639 |
|
printf("%s", expr2); |
|
640 |
|
printf(" + 1, len - 1"); |
|
641 |
|
} else { |
|
642 |
|
printf("%s", expr1); |
|
643 |
|
printf(", "); |
|
644 |
|
printf("%s", expr2); |
|
645 |
|
printf(", len"); |
|
646 |
|
} |
|
647 |
|
printf(")"); |
|
648 |
|
}/*}}}*/ |
|
649 |
|
/*{{{ output_comparison_strncmp */ |
|
650 |
|
static void output_comparison_strncmp(u8 *expr1, u8 *expr2) |
|
651 |
|
{ |
|
652 |
|
bool firstchar_done; |
|
653 |
|
|
|
654 |
|
firstchar_done = output_firstchar_comparison(expr1, expr2); |
|
655 |
|
printf(" && !"); |
|
656 |
|
if (OPTS(UPPERLOWER)) |
|
657 |
|
printf("gperf_case_"); |
|
658 |
|
printf("strncmp ("); |
|
659 |
|
if (firstchar_done) { |
|
660 |
|
printf("%s", expr1); |
|
661 |
|
printf(" + 1, "); |
|
662 |
|
printf("%s", expr2); |
|
663 |
|
printf(" + 1, len - 1"); |
|
664 |
|
} else { |
|
665 |
|
printf("%s", expr1); |
|
666 |
|
printf(", "); |
|
667 |
|
printf("%s", expr2); |
|
668 |
|
printf(", len"); |
|
669 |
|
} |
|
670 |
|
printf(") && "); |
|
671 |
|
printf("%s", expr2); |
|
672 |
|
printf("[len] == '\\0'"); |
|
673 |
|
}/*}}}*/ |
|
674 |
|
/*{{{ output_comparison_strcmp */ |
|
675 |
|
static void output_comparison_strcmp(u8 *expr1, u8 *expr2) |
|
676 |
|
{ |
|
677 |
|
bool firstchar_done; |
|
678 |
|
|
|
679 |
|
firstchar_done = output_firstchar_comparison(expr1, expr2); |
|
680 |
|
printf(" && !"); |
|
681 |
|
if (OPTS(UPPERLOWER)) |
|
682 |
|
printf("gperf_case_"); |
|
683 |
|
printf("strcmp ("); |
|
684 |
|
if (firstchar_done) { |
|
685 |
|
printf("%s", expr1); |
|
686 |
|
printf(" + 1, "); |
|
687 |
|
printf("%s", expr2); |
|
688 |
|
printf(" + 1"); |
|
689 |
|
} else { |
|
690 |
|
printf("%s", expr1); |
|
691 |
|
printf(", "); |
|
692 |
|
printf("%s", expr2); |
|
693 |
|
} |
|
694 |
|
printf(")"); |
|
695 |
|
}/*}}}*/ |
|
696 |
|
/*}}} output_comparison_X -- END */ |
|
697 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
698 |
|
/*}}} code -- END */ |
|
699 |
|
/*}}} local -- END */ |
|
700 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
701 |
|
/*{{{ output_new */ |
|
702 |
|
/* Constructor. |
|
703 |
|
Note about the keyword list starting at head: |
|
704 |
|
- The list is ordered by increasing _hash_value. This has been achieved |
|
705 |
|
by Search::sort(). |
|
706 |
|
- Duplicates, i.e. keywords with the same _selchars set, are chained |
|
707 |
|
through the _duplicate_link pointer. Only one representative per |
|
708 |
|
duplicate equivalence class remains on the linear keyword list. |
|
709 |
|
- Accidental duplicates, i.e. keywords for which the _asso_values[] search |
|
710 |
|
couldn't achieve different hash values, cannot occur on the linear |
|
711 |
|
keyword list. Search::optimize would catch this mistake. |
|
712 |
|
*/ |
|
713 |
|
static struct Output *output_new(struct Keyword_List *head, u8 *struct_decl, |
|
714 |
|
u32 struct_decl_lineno, u8 *return_type, |
|
715 |
|
u8 *struct_tag, u8 *verbatim_declarations, |
|
716 |
|
u8 *verbatim_declarations_end, |
|
717 |
|
u32 verbatim_declarations_lineno, |
|
718 |
|
u8 *verbatim_code, u8 *verbatim_code_end, |
|
719 |
|
u32 verbatim_code_lineno, bool charset_dependent, |
|
720 |
|
s32 total_keys, s32 max_key_len, s32 min_key_len, |
|
721 |
|
bool hash_includes_len, struct Positions *positions, |
|
722 |
|
u32 *alpha_inc, s32 total_duplicates, |
|
723 |
|
u32 alpha_size, s32 *asso_values) |
|
724 |
|
{ |
|
725 |
|
struct Output *t; |
|
726 |
|
|
|
727 |
|
t = calloc(1, sizeof(*t)); |
|
728 |
|
t->head = head; |
|
729 |
|
t->struct_decl = struct_decl; |
|
730 |
|
t->struct_decl_lineno = struct_decl_lineno; |
|
731 |
|
t->return_type = return_type; |
|
732 |
|
t->struct_tag = struct_tag; |
|
733 |
|
t->verbatim_declarations = verbatim_declarations; |
|
734 |
|
t->verbatim_declarations_end = verbatim_declarations_end; |
|
735 |
|
t->verbatim_declarations_lineno = verbatim_declarations_lineno; |
|
736 |
|
t->verbatim_code = verbatim_code; |
|
737 |
|
t->verbatim_code_end = verbatim_code_end; |
|
738 |
|
t->verbatim_code_lineno = verbatim_code_lineno; |
|
739 |
|
t->charset_dependent = charset_dependent; |
|
740 |
|
t->total_keys = total_keys; |
|
741 |
|
t->max_key_len = max_key_len; |
|
742 |
|
t->min_key_len = min_key_len; |
|
743 |
|
t->hash_includes_len = hash_includes_len; |
|
744 |
|
t->key_positions = pos_new_cpy(positions); |
|
745 |
|
t->alpha_inc = alpha_inc; |
|
746 |
|
t->total_duplicates = total_duplicates; |
|
747 |
|
t->alpha_size = alpha_size; |
|
748 |
|
t->asso_values = asso_values; |
|
749 |
|
return t; |
|
750 |
|
}/*}}}*/ |
|
751 |
|
/*{{{ output_del */ |
|
752 |
|
static void output_del(struct Output *t) |
|
753 |
|
{ |
|
754 |
|
pos_del(t->key_positions); |
|
755 |
|
free(t); |
|
756 |
|
} |
|
757 |
|
/*}}}*/ |
|
758 |
|
/*{{{ output_do */ |
|
759 |
|
/* generates the hash function and the key word recognizer function based upon the user's Options */ |
|
760 |
|
static void output_do(struct Output *t) |
|
761 |
|
{ |
|
762 |
|
output_compute_min_max(t); |
|
763 |
|
if (OPTS(CPLUSPLUS)) /* yeah, we know nowadays that c++ is never a good idea anyway */ |
|
764 |
|
/* |
|
765 |
|
* The 'register' keyword is removed from C++17. See |
|
766 |
|
* http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4340 |
|
767 |
|
*/ |
|
768 |
|
register_scs = ""; |
|
769 |
|
else |
|
770 |
|
register_scs = "register "; |
|
771 |
|
if (OPTS(C) || OPTS(ANSIC) || OPTS(CPLUSPLUS)) { |
|
772 |
|
const_always = "const "; |
|
773 |
|
const_readonly_array = (OPTS(CONST) ? "const " : ""); |
|
774 |
|
const_for_struct = ((OPTS(CONST) && OPTS(TYPE)) ? "const " : "" ); |
|
775 |
|
} else { |
|
776 |
|
const_always = ""; |
|
777 |
|
const_readonly_array = ""; |
|
778 |
|
const_for_struct = ""; |
|
779 |
|
} |
|
780 |
|
if (!OPTS(TYPE)) { |
|
781 |
|
t->return_type = (const_always[0] != 0 ? "const char *" : "char *"); |
|
782 |
|
t->struct_tag = (const_always[0] != 0 ? "const char *" : "char *"); |
|
783 |
|
} |
|
784 |
|
t->wordlist_eltype = (OPTS(SHAREDLIB) && !OPTS(TYPE) ? (u8*)"int" : t->struct_tag); |
|
785 |
|
printf ("/* "); |
|
786 |
|
if (OPTS(KRC)) |
|
787 |
|
printf("KR-C"); |
|
788 |
|
else if (OPTS(C)) |
|
789 |
|
printf("C"); |
|
790 |
|
else if (OPTS(ANSIC)) |
|
791 |
|
printf("ANSI-C"); |
|
792 |
|
else if (OPTS(CPLUSPLUS)) |
|
793 |
|
printf("C++"); |
|
794 |
|
printf(" code produced by gperf version %s */\n", cgperf_version_string); |
|
795 |
|
opts_print(options); |
|
796 |
|
printf("\n"); |
|
797 |
|
if (!OPTS(POSITIONS)) { |
|
798 |
|
printf ("/* Computed positions: -k'"); |
|
799 |
|
pos_print(t->key_positions); |
|
800 |
|
printf("' */\n"); |
|
801 |
|
} |
|
802 |
|
printf("\n"); |
|
803 |
|
if (t->charset_dependent && (t->key_positions->size > 0 || OPTS(UPPERLOWER))) { |
|
804 |
|
printf("#if !((' ' == 32) && ('!' == 33) && ('\"' == 34) && ('#' == 35) \\\n" |
|
805 |
|
" && ('%%' == 37) && ('&' == 38) && ('\\'' == 39) && ('(' == 40) \\\n" |
|
806 |
|
" && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \\\n" |
|
807 |
|
" && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \\\n" |
|
808 |
|
" && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \\\n" |
|
809 |
|
" && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \\\n" |
|
810 |
|
" && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \\\n" |
|
811 |
|
" && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \\\n" |
|
812 |
|
" && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \\\n" |
|
813 |
|
" && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \\\n" |
|
814 |
|
" && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \\\n" |
|
815 |
|
" && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \\\n" |
|
816 |
|
" && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \\\n" |
|
817 |
|
" && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \\\n" |
|
818 |
|
" && ('Z' == 90) && ('[' == 91) && ('\\\\' == 92) && (']' == 93) \\\n" |
|
819 |
|
" && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \\\n" |
|
820 |
|
" && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \\\n" |
|
821 |
|
" && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \\\n" |
|
822 |
|
" && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \\\n" |
|
823 |
|
" && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \\\n" |
|
824 |
|
" && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \\\n" |
|
825 |
|
" && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \\\n" |
|
826 |
|
" && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))\n" |
|
827 |
|
"/* The character set is not based on ISO-646. */\n"); |
|
828 |
|
printf("%s \"gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>.\"\n", OPTS(KRC) || OPTS(C) ? "error" : "#error"); |
|
829 |
|
printf ("#endif\n\n"); |
|
830 |
|
} |
|
831 |
|
if (t->verbatim_declarations < t->verbatim_declarations_end) { |
|
832 |
|
output_line_directive(t->verbatim_declarations_lineno); |
|
833 |
|
fwrite(t->verbatim_declarations, 1, t->verbatim_declarations_end - |
|
834 |
|
t->verbatim_declarations, stdout); |
|
835 |
|
} |
|
836 |
|
if (OPTS(TYPE) && !OPTS(NOTYPE)) { |
|
837 |
|
/* output type declaration now, reference it later on.... */ |
|
838 |
|
output_line_directive(t->struct_decl_lineno); |
|
839 |
|
printf("%s\n", t->struct_decl); |
|
840 |
|
} |
|
841 |
|
if (OPTS(INCLUDE)) |
|
842 |
|
printf("#include <string.h>\n"); /* declare strlen(), strcmp(), strncmp() */ |
|
843 |
|
if (!OPTS(ENUM)) /* refactored: overzealous code factorization */ |
|
844 |
|
output_constants_defines(t); |
|
845 |
|
else if (OPTS(GLOBAL)) |
|
846 |
|
output_constants_enum(t, ""); |
|
847 |
|
printf("/* maximum key range = %d, duplicates = %d */\n\n", t->max_hash_value - t->min_hash_value + 1, t->total_duplicates); |
|
848 |
|
if (OPTS(UPPERLOWER)) { |
|
849 |
|
#if USE_DOWNCASE_TABLE |
|
850 |
|
output_upperlower_table(); |
|
851 |
|
#endif |
|
852 |
|
if (OPTS(LENTABLE)) |
|
853 |
|
output_upperlower_memcmp(); |
|
854 |
|
else { |
|
855 |
|
if (OPTS(COMP)) |
|
856 |
|
output_upperlower_strncmp(); |
|
857 |
|
else |
|
858 |
|
output_upperlower_strcmp(); |
|
859 |
|
} |
|
860 |
|
} |
|
861 |
|
if (OPTS(CPLUSPLUS)) |
|
862 |
|
printf( |
|
863 |
|
"class %s\n" |
|
864 |
|
"{\n" |
|
865 |
|
"private:\n" |
|
866 |
|
" static inline unsigned int %s (const char *str, size_t len);\n" |
|
867 |
|
"public:\n" |
|
868 |
|
" static %s%s%s (const char *str, size_t len);\n" |
|
869 |
|
"};\n" |
|
870 |
|
"\n", options->class_name, options->hash_name, const_for_struct, t->return_type, |
|
871 |
|
options->function_name); |
|
872 |
|
output_hash_function(t); |
|
873 |
|
if (OPTS(SHAREDLIB) && (OPTS(GLOBAL) || OPTS(TYPE))) |
|
874 |
|
output_lookup_pools(t); |
|
875 |
|
if (OPTS(GLOBAL)) |
|
876 |
|
output_lookup_tables(t); |
|
877 |
|
output_lookup_function(t); |
|
878 |
|
if (t->verbatim_code < t->verbatim_code_end) { |
|
879 |
|
output_line_directive(t->verbatim_code_lineno); |
|
880 |
|
fwrite(t->verbatim_code, 1, t->verbatim_code_end - t->verbatim_code, stdout); |
|
881 |
|
} |
|
882 |
|
fflush(stdout); |
|
883 |
|
}/*}}}*/ |
|
884 |
|
/*{{{ output_compute_min_max */ |
|
885 |
|
static void output_compute_min_max(struct Output *t) |
|
886 |
|
{ |
|
887 |
|
struct Keyword_List *tmp; |
|
888 |
|
/* |
|
889 |
|
* since the list is already sorted by hash value all we need to do is to look at the first |
|
890 |
|
* and the last element of the list |
|
891 |
|
*/ |
|
892 |
|
t->min_hash_value = t->head->kw->hash_value; |
|
893 |
|
tmp = t->head; |
|
894 |
|
loop { |
|
895 |
|
if (tmp->next == 0) |
|
896 |
|
break; |
|
897 |
|
tmp = tmp->next; |
|
898 |
|
} |
|
899 |
|
t->max_hash_value = tmp->kw->hash_value; |
|
900 |
|
}/*}}}*/ |
|
901 |
|
/*{{{ output_constants_defines */ |
|
902 |
|
static void output_constants_defines(struct Output *t) |
|
903 |
|
{ |
|
904 |
|
printf("\n"); |
|
905 |
|
output_constant_define("TOTAL_KEYWORDS", t->total_keys); |
|
906 |
|
output_constant_define("MIN_WORD_LENGTH", t->min_key_len); |
|
907 |
|
output_constant_define("MAX_WORD_LENGTH", t->max_key_len); |
|
908 |
|
output_constant_define("MIN_HASH_VALUE", t->min_hash_value); |
|
909 |
|
output_constant_define("MAX_HASH_VALUE", t->max_hash_value); |
|
910 |
|
}/*}}}*/ |
|
911 |
|
/*{{{ output_constants_enum */ |
|
912 |
|
static void output_constants_enum(struct Output *t, u8 *indentation) |
|
913 |
|
{ |
|
914 |
|
bool pending_comma; |
|
915 |
|
|
|
916 |
|
printf("%senum\n" |
|
917 |
|
"%s {\n", indentation, indentation); |
|
918 |
|
pending_comma = false; |
|
919 |
|
output_constant_enum("TOTAL_KEYWORDS", t->total_keys, indentation, &pending_comma); |
|
920 |
|
output_constant_enum("MIN_WORD_LENGTH", t->min_key_len, indentation, &pending_comma); |
|
921 |
|
output_constant_enum("MAX_WORD_LENGTH", t->max_key_len, indentation, &pending_comma); |
|
922 |
|
output_constant_enum("MIN_HASH_VALUE", t->min_hash_value, indentation, &pending_comma); |
|
923 |
|
output_constant_enum("MAX_HASH_VALUE", t->max_hash_value, indentation, &pending_comma); |
|
924 |
|
if (pending_comma) |
|
925 |
|
printf("\n"); |
|
926 |
|
printf("%s };\n\n", indentation); |
|
927 |
|
}/*}}}*/ |
|
928 |
|
/*{{{ output_hash_function */ |
|
929 |
|
/* Generates C code for the hash function that returns the |
|
930 |
|
proper encoding for each keyword. |
|
931 |
|
The hash function has the signature |
|
932 |
|
unsigned int <hash> (const char *str, size_t len). */ |
|
933 |
|
static void output_hash_function(struct Output *t) |
|
934 |
|
{ |
|
935 |
|
/* output the function's head */ |
|
936 |
|
if (OPTS(CPLUSPLUS)) |
|
937 |
|
printf("inline "); |
|
938 |
|
else if (OPTS(KRC) || OPTS(C) || OPTS(ANSIC)) |
|
939 |
|
printf( |
|
940 |
|
"#ifdef __GNUC__\n" |
|
941 |
|
"__inline\n" |
|
942 |
|
"#else\n" |
|
943 |
|
"#ifdef __cplusplus\n" |
|
944 |
|
"inline\n" |
|
945 |
|
"#endif\n" |
|
946 |
|
"#endif\n"); |
|
947 |
|
if (/* the function does not use the 'str' argument? */ |
|
948 |
|
(t->key_positions->size == 0) |
|
949 |
|
|| /* the function uses 'str', but not the 'len' argument? */ |
|
950 |
|
(!t->hash_includes_len |
|
951 |
|
&& t->key_positions->positions[0] < t->min_key_len) |
|
952 |
|
&& t->key_positions->positions[t->key_positions->size - 1] != POS_LASTCHAR) |
|
953 |
|
/* pacify lint */ |
|
954 |
|
printf("/*ARGSUSED*/\n"); |
|
955 |
|
if (OPTS(KRC) || OPTS(C) || OPTS(ANSIC)) |
|
956 |
|
printf("static "); |
|
957 |
|
printf("unsigned int\n"); |
|
958 |
|
if (OPTS(CPLUSPLUS)) |
|
959 |
|
printf("%s::", options->class_name); |
|
960 |
|
printf("%s ", options->hash_name); |
|
961 |
|
printf(OPTS(KRC) ? |
|
962 |
|
"(str, len)\n" |
|
963 |
|
" %schar *str;\n" |
|
964 |
|
" %ssize_t len;\n" : |
|
965 |
|
OPTS(C) ? |
|
966 |
|
"(str, len)\n" |
|
967 |
|
" %sconst char *str;\n" |
|
968 |
|
" %ssize_t len;\n" : |
|
969 |
|
OPTS(ANSIC) || OPTS(CPLUSPLUS) ? |
|
970 |
|
"(%sconst char *str, %ssize_t len)\n" : |
|
971 |
|
"", register_scs, register_scs); |
|
972 |
|
|
|
973 |
|
/* |
|
974 |
|
* note that when the hash function is called, it has already been verified that |
|
975 |
|
* min_key_len <= len <= max_key_len |
|
976 |
|
*/ |
|
977 |
|
/* output the function's body */ |
|
978 |
|
printf( |
|
979 |
|
"{\n"); |
|
980 |
|
/* first the asso_values array */ |
|
981 |
|
if (t->key_positions->size > 0) { |
|
982 |
|
s32 columns; |
|
983 |
|
s32 field_width; |
|
984 |
|
s32 trunc; |
|
985 |
|
u32 count; |
|
986 |
|
/* |
|
987 |
|
* the values in the asso_values array are all unsigned integers <= MAX_HASH_VALUE + |
|
988 |
|
* 1 |
|
989 |
|
*/ |
|
990 |
|
printf( |
|
991 |
|
" static %s%s asso_values[] =\n" |
|
992 |
|
" {", const_readonly_array, smallest_integral_type(t->max_hash_value + 1)); |
|
993 |
|
columns = 10; |
|
994 |
|
/* calculate maximum number of digits required for MAX_HASH_VALUE + 1 */ |
|
995 |
|
field_width = 2; |
|
996 |
|
trunc = t->max_hash_value + 1; |
|
997 |
|
loop { |
|
998 |
|
trunc /= 10; |
|
999 |
|
if (trunc <= 0) |
|
1000 |
|
break; |
|
1001 |
|
++field_width; |
|
1002 |
|
} |
|
1003 |
|
count = 0; |
|
1004 |
|
loop { |
|
1005 |
|
if (count >= t->alpha_size) |
|
1006 |
|
break; |
|
1007 |
|
if (count > 0) |
|
1008 |
|
printf(","); |
|
1009 |
|
if ((count % columns) == 0) |
|
1010 |
|
printf("\n "); |
|
1011 |
|
printf("%*d", field_width, t->asso_values[count]); |
|
1012 |
|
++count; |
|
1013 |
|
} |
|
1014 |
|
printf( |
|
1015 |
|
"\n" |
|
1016 |
|
" };\n"); |
|
1017 |
|
} |
|
1018 |
|
if (t->key_positions->size == 0) { |
|
1019 |
|
/* trivial case: No key positions at all */ |
|
1020 |
|
printf( |
|
1021 |
|
" return %s;\n", t->hash_includes_len ? "len" : "0"); |
|
1022 |
|
} else { |
|
1023 |
|
struct PositionIterator *iter; |
|
1024 |
|
s32 key_pos; |
|
1025 |
|
/* |
|
1026 |
|
* Iterate through the key positions. Remember that Positions::sort() has sorted |
|
1027 |
|
* them in decreasing order, with Positions::LASTCHAR coming last. |
|
1028 |
|
*/ |
|
1029 |
|
iter = pos_iterator(t->key_positions, t->max_key_len); |
|
1030 |
|
/* get the highest key position */ |
|
1031 |
|
key_pos = positer_next(iter); |
|
1032 |
|
if (key_pos == POS_LASTCHAR || key_pos < t->min_key_len) { |
|
1033 |
|
/* |
|
1034 |
|
* We can perform additional optimizations here: Write it out as a single |
|
1035 |
|
* expression. Note that the values are added as 'int's even though the |
|
1036 |
|
* asso_values array may contain 'unsigned char's or 'unsigned short's. |
|
1037 |
|
*/ |
|
1038 |
|
printf( |
|
1039 |
|
" return %s", t->hash_includes_len ? "len + " : ""); |
|
1040 |
|
if (t->key_positions->size == 2 |
|
1041 |
|
&& t->key_positions->positions[0] == 0 |
|
1042 |
|
&& t->key_positions->positions[1] == POS_LASTCHAR) { |
|
1043 |
|
/* optimize special case of "-k 1,$" */ |
|
1044 |
|
output_asso_values_ref(t, POS_LASTCHAR); |
|
1045 |
|
printf(" + "); |
|
1046 |
|
output_asso_values_ref(t, 0); |
|
1047 |
|
} else { |
|
1048 |
|
loop { |
|
1049 |
|
if (key_pos == POS_LASTCHAR) |
|
1050 |
|
break; |
|
1051 |
|
output_asso_values_ref(t, key_pos); |
|
1052 |
|
key_pos = positer_next(iter); |
|
1053 |
|
if (key_pos != POSITER_EOS) |
|
1054 |
|
printf(" + "); |
|
1055 |
|
else |
|
1056 |
|
break; |
|
1057 |
|
} |
|
1058 |
|
if (key_pos == POS_LASTCHAR) |
|
1059 |
|
output_asso_values_ref(t, POS_LASTCHAR); |
|
1060 |
|
} |
|
1061 |
|
printf(";\n"); |
|
1062 |
|
} else { |
|
1063 |
|
u8 *fallthrough_marker; |
|
1064 |
|
/* we've got to use the correct, but brute force, technique */ |
|
1065 |
|
/* |
|
1066 |
|
* pseudo-statement or comment that avoids a compiler warning or lint |
|
1067 |
|
* warning |
|
1068 |
|
*/ |
|
1069 |
|
fallthrough_marker = |
|
1070 |
|
"#if defined __cplusplus && (__cplusplus >= 201703L || (__cplusplus >= 201103L && defined __clang_major__ && defined __clang_minor__ && __clang_major__ + (__clang_minor__ >= 9) > 3))\n" |
|
1071 |
|
" [[fallthrough]];\n" |
|
1072 |
|
"#elif defined __GNUC__ && __GNUC__ >= 7\n" |
|
1073 |
|
" __attribute__ ((__fallthrough__));\n" |
|
1074 |
|
"#endif\n" |
|
1075 |
|
" /*FALLTHROUGH*/\n"; |
|
1076 |
|
/* |
|
1077 |
|
* it doesn't really matter whether hval is an 'int' or 'unsigned int', but |
|
1078 |
|
* 'unsigned int' gives fewer warnings |
|
1079 |
|
*/ |
|
1080 |
|
printf( |
|
1081 |
|
" %sunsigned int hval = %s;\n\n" |
|
1082 |
|
" switch (%s)\n" |
|
1083 |
|
" {\n" |
|
1084 |
|
" default:\n", register_scs, t->hash_includes_len ? "len" : "0", |
|
1085 |
|
t->hash_includes_len ? "hval" : "len"); |
|
1086 |
|
loop { |
|
1087 |
|
if (key_pos == POS_LASTCHAR || key_pos < t->max_key_len) |
|
1088 |
|
break; |
|
1089 |
|
key_pos = positer_next(iter); |
|
1090 |
|
if (key_pos == POSITER_EOS) |
|
1091 |
|
break; |
|
1092 |
|
} |
|
1093 |
|
if (key_pos != POSITER_EOS && key_pos != POS_LASTCHAR) { |
|
1094 |
|
s32 i; |
|
1095 |
|
|
|
1096 |
|
i = key_pos; |
|
1097 |
|
loop { |
|
1098 |
|
if (i > key_pos) |
|
1099 |
|
printf("%s", fallthrough_marker); |
|
1100 |
|
loop { |
|
1101 |
|
if (i <= key_pos) |
|
1102 |
|
break; |
|
1103 |
|
printf(" case %d:\n", i); |
|
1104 |
|
i--; |
|
1105 |
|
} |
|
1106 |
|
printf(" hval += "); |
|
1107 |
|
output_asso_values_ref(t, key_pos); |
|
1108 |
|
printf(";\n"); |
|
1109 |
|
key_pos = positer_next(iter); |
|
1110 |
|
if (key_pos == POSITER_EOS || key_pos == POS_LASTCHAR) |
|
1111 |
|
break; |
|
1112 |
|
} |
|
1113 |
|
if (i >= t->min_key_len) |
|
1114 |
|
printf("%s", fallthrough_marker); |
|
1115 |
|
loop { |
|
1116 |
|
if (i < t->min_key_len) |
|
1117 |
|
break; |
|
1118 |
|
printf(" case %d:\n", i); |
|
1119 |
|
i--; |
|
1120 |
|
} |
|
1121 |
|
} |
|
1122 |
|
printf( |
|
1123 |
|
" break;\n" |
|
1124 |
|
" }\n" |
|
1125 |
|
" return hval"); |
|
1126 |
|
if (key_pos == POS_LASTCHAR) { |
|
1127 |
|
printf(" + "); |
|
1128 |
|
output_asso_values_ref(t, POS_LASTCHAR); |
|
1129 |
|
} |
|
1130 |
|
printf (";\n"); |
|
1131 |
|
} |
|
1132 |
|
positer_del(iter); |
|
1133 |
|
} |
|
1134 |
|
printf ("}\n\n"); |
|
1135 |
|
}/*}}}*/ |
|
1136 |
|
/*{{{ output_asso_values_ref */ |
|
1137 |
|
/* Generates a C expression for an asso_values[] reference. */ |
|
1138 |
|
static void output_asso_values_ref(struct Output *t, s32 pos) |
|
1139 |
|
{ |
|
1140 |
|
printf("asso_values["); |
|
1141 |
|
/* |
|
1142 |
|
* Always cast to unsigned char. This is necessary when the alpha_inc is nonzero, and also |
|
1143 |
|
* avoids a gcc warning "subscript has type 'char'". |
|
1144 |
|
*/ |
|
1145 |
|
if (OPTS(CPLUSPLUS)) { |
|
1146 |
|
/* |
|
1147 |
|
* In C++, a C style cast may lead to a 'warning: use of old-style cast'. |
|
1148 |
|
* Therefore prefer the C++ style cast syntax. |
|
1149 |
|
*/ |
|
1150 |
|
printf("static_cast<unsigned char>("); |
|
1151 |
|
output_asso_values_index(t, pos); |
|
1152 |
|
printf(")"); |
|
1153 |
|
} else { |
|
1154 |
|
printf("(unsigned char)"); |
|
1155 |
|
output_asso_values_index(t, pos); |
|
1156 |
|
} |
|
1157 |
|
printf("]"); |
|
1158 |
|
}/*}}}*/ |
|
1159 |
|
/*{{{ output_asso_values_index */ |
|
1160 |
|
/* generates a C expression for an asso_values[] index */ |
|
1161 |
|
static void output_asso_values_index(struct Output *t, s32 pos) |
|
1162 |
|
{ |
|
1163 |
|
if (pos == POS_LASTCHAR) |
|
1164 |
|
printf("str[len - 1]"); |
|
1165 |
|
else { |
|
1166 |
|
printf("str[%d]", pos); |
|
1167 |
|
if (t->alpha_inc[pos]) |
|
1168 |
|
printf("+%u", t->alpha_inc[pos]); |
|
1169 |
|
} |
|
1170 |
|
}/*}}}*/ |
|
1171 |
|
/*{{{ output_lookup_pools */ |
|
1172 |
|
/* generate all pools needed for the lookup function */ |
|
1173 |
|
static void output_lookup_pools(struct Output *t) |
|
1174 |
|
{ |
|
1175 |
|
if (OPTS(SWITCH)) { |
|
1176 |
|
if (OPTS(TYPE) || (OPTS(DUP) && t->total_duplicates > 0)) |
|
1177 |
|
output_string_pool(t); |
|
1178 |
|
} else |
|
1179 |
|
output_string_pool(t); |
|
1180 |
|
}/*}}}*/ |
|
1181 |
|
/*{{{ output_string_pool */ |
|
1182 |
|
/* |
|
1183 |
|
* Prints out the string pool, containing the strings of the keyword table. |
|
1184 |
|
* Only called if option[SHAREDLIB] |
|
1185 |
|
*/ |
|
1186 |
|
static void output_string_pool(struct Output *t) |
|
1187 |
|
{ |
|
1188 |
|
u8 *indent; |
|
1189 |
|
s32 index; |
|
1190 |
|
struct Keyword_List *tmp; |
|
1191 |
|
|
|
1192 |
|
indent = OPTS(TYPE) || OPTS(GLOBAL) ? "" : " "; |
|
1193 |
|
|
|
1194 |
|
printf( |
|
1195 |
|
"%sstruct %s_t\n" |
|
1196 |
|
"%s {\n", indent, options->stringpool_name, indent); |
|
1197 |
|
tmp = t->head; |
|
1198 |
|
index = 0; |
|
1199 |
|
loop { |
|
1200 |
|
struct Keyword *kw; |
|
1201 |
|
|
|
1202 |
|
if (tmp == 0) |
|
1203 |
|
break; |
|
1204 |
|
kw = tmp->kw; |
|
1205 |
|
/* |
|
1206 |
|
* If generating a switch statement, and there is no user defined type, we generate |
|
1207 |
|
* non-duplicates directly in the code. Only duplicates go into the table. |
|
1208 |
|
*/ |
|
1209 |
|
if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0) |
|
1210 |
|
continue; |
|
1211 |
|
if (!OPTS(SWITCH) && !OPTS(DUP)) |
|
1212 |
|
index = kw->hash_value; |
|
1213 |
|
printf("%s char %s_str%d[sizeof(", indent, options->stringpool_name, index); |
|
1214 |
|
output_string(kw->allchars, kw->allchars_length); |
|
1215 |
|
printf(")];\n"); |
|
1216 |
|
/* deal with duplicates specially */ |
|
1217 |
|
if (kw->duplicate_link) {/* implies option[DUP] */ |
|
1218 |
|
struct Keyword *links; |
|
1219 |
|
|
|
1220 |
|
links = kw->duplicate_link; |
|
1221 |
|
loop { |
|
1222 |
|
if (links == 0) |
|
1223 |
|
break; |
|
1224 |
|
if (!(links->allchars_length == kw->allchars_length |
|
1225 |
|
&& memcmp(links->allchars, kw->allchars, |
|
1226 |
|
kw->allchars_length) == 0)) { |
|
1227 |
|
++index; |
|
1228 |
|
printf("%s char %s_str%d[sizeof(", indent, |
|
1229 |
|
options->stringpool_name, index); |
|
1230 |
|
output_string(links->allchars, links->allchars_length); |
|
1231 |
|
printf(")];\n"); |
|
1232 |
|
} |
|
1233 |
|
links = links->duplicate_link; |
|
1234 |
|
} |
|
1235 |
|
} |
|
1236 |
|
++index; |
|
1237 |
|
tmp = tmp->next; |
|
1238 |
|
} |
|
1239 |
|
printf( |
|
1240 |
|
"%s };\n", indent); |
|
1241 |
|
printf( |
|
1242 |
|
"%sstatic %sstruct %s_t %s_contents =\n" |
|
1243 |
|
"%s {\n", indent, const_readonly_array, options->stringpool_name, options->stringpool_name, |
|
1244 |
|
indent); |
|
1245 |
|
tmp = t->head; |
|
1246 |
|
index = 0; |
|
1247 |
|
loop { |
|
1248 |
|
struct Keyword *kw; |
|
1249 |
|
|
|
1250 |
|
if (tmp == 0) |
|
1251 |
|
break; |
|
1252 |
|
kw = tmp->kw; |
|
1253 |
|
/* |
|
1254 |
|
* If generating a switch statement, and there is no user defined type, we generate |
|
1255 |
|
* non-duplicates directly in the code. Only duplicates go into the table. |
|
1256 |
|
*/ |
|
1257 |
|
if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0) |
|
1258 |
|
continue; |
|
1259 |
|
if (index > 0) |
|
1260 |
|
printf(",\n"); |
|
1261 |
|
|
|
1262 |
|
if (!OPTS(SWITCH) && !OPTS(DUP)) |
|
1263 |
|
index = kw->hash_value; |
|
1264 |
|
printf( |
|
1265 |
|
"%s ", indent); |
|
1266 |
|
output_string(kw->allchars, kw->allchars_length); |
|
1267 |
|
/* deal with duplicates specially */ |
|
1268 |
|
if (kw->duplicate_link != 0) {/* implies option[DUP] */ |
|
1269 |
|
struct Keyword *links; |
|
1270 |
|
|
|
1271 |
|
links = kw->duplicate_link; |
|
1272 |
|
loop { |
|
1273 |
|
if (links == 0) |
|
1274 |
|
break; |
|
1275 |
|
if (!(links->allchars_length == kw->allchars_length |
|
1276 |
|
&& memcmp(links->allchars, kw->allchars, |
|
1277 |
|
kw->allchars_length) == 0)) { |
|
1278 |
|
++index; |
|
1279 |
|
printf(",\n"); |
|
1280 |
|
printf( |
|
1281 |
|
"%s ", indent); |
|
1282 |
|
output_string(links->allchars, links->allchars_length); |
|
1283 |
|
} |
|
1284 |
|
links = links->duplicate_link; |
|
1285 |
|
} |
|
1286 |
|
} |
|
1287 |
|
++index; |
|
1288 |
|
tmp = tmp->next; |
|
1289 |
|
} |
|
1290 |
|
if (index > 0) |
|
1291 |
|
printf("\n"); |
|
1292 |
|
printf( |
|
1293 |
|
"%s };\n", indent); |
|
1294 |
|
printf( |
|
1295 |
|
"%s#define %s ((%schar *) &%s_contents)\n", indent, options->stringpool_name, const_always, |
|
1296 |
|
options->stringpool_name); |
|
1297 |
|
if (OPTS(GLOBAL)) |
|
1298 |
|
printf( |
|
1299 |
|
"\n"); |
|
1300 |
|
}/*}}}*/ |
|
1301 |
|
/*{{{ output_lookup_tables */ |
|
1302 |
|
/* generate all the tables needed for the lookup function */ |
|
1303 |
|
static void output_lookup_tables(struct Output *t) |
|
1304 |
|
{ |
|
1305 |
|
if (OPTS(SWITCH)) { |
|
1306 |
|
/* use the switch in place of lookup table */ |
|
1307 |
|
if (OPTS(LENTABLE) && (OPTS(DUP) && t->total_duplicates > 0)) |
|
1308 |
|
output_keylength_table(t); |
|
1309 |
|
if (OPTS(TYPE) || (OPTS(DUP) && t->total_duplicates > 0)) |
|
1310 |
|
output_keyword_table(t); |
|
1311 |
|
} else { |
|
1312 |
|
/* use the lookup table, in place of switch */ |
|
1313 |
|
if (OPTS(LENTABLE)) |
|
1314 |
|
output_keylength_table(t); |
|
1315 |
|
output_keyword_table(t); |
|
1316 |
|
output_lookup_array(t); |
|
1317 |
|
} |
|
1318 |
|
}/*}}}*/ |
|
1319 |
|
/*{{{ output_keylength_table */ |
|
1320 |
|
/* |
|
1321 |
|
* Prints out a table of keyword lengths, for use with the comparison code in generated function |
|
1322 |
|
* 'in_word_set'. Only called if option[LENTABLE]. |
|
1323 |
|
*/ |
|
1324 |
|
static void output_keylength_table(struct Output *t) |
|
1325 |
|
{ |
|
1326 |
|
s32 columns; |
|
1327 |
|
u8 *indent; |
|
1328 |
|
s32 index; |
|
1329 |
|
s32 column; |
|
1330 |
|
struct Keyword_List *tmp; |
|
1331 |
|
|
|
1332 |
|
columns = 14; |
|
1333 |
|
indent = OPTS(GLOBAL) ? "" : " "; |
|
1334 |
|
|
|
1335 |
|
printf( |
|
1336 |
|
"%sstatic %s%s %s[] =\n" |
|
1337 |
|
"%s {", indent, const_readonly_array, smallest_integral_type(t->max_key_len), |
|
1338 |
|
options->lengthtable_name, indent); |
|
1339 |
|
column = 0; |
|
1340 |
|
tmp = t->head; |
|
1341 |
|
index = 0; |
|
1342 |
|
loop { |
|
1343 |
|
struct Keyword *kw; |
|
1344 |
|
|
|
1345 |
|
if (tmp == 0) |
|
1346 |
|
break; |
|
1347 |
|
kw = tmp->kw; |
|
1348 |
|
/* |
|
1349 |
|
* If generating a switch statement, and there is no user defined type, we generate |
|
1350 |
|
* non-duplicates directly in the code. Only duplicates go into the table. |
|
1351 |
|
*/ |
|
1352 |
|
if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0) |
|
1353 |
|
continue; |
|
1354 |
|
if (index < kw->hash_value && !OPTS(SWITCH) && !OPTS(DUP)) { |
|
1355 |
|
/* some blank entries */ |
|
1356 |
|
loop { |
|
1357 |
|
if (index >= kw->hash_value) |
|
1358 |
|
break; |
|
1359 |
|
if (index > 0) |
|
1360 |
|
printf(","); |
|
1361 |
|
if ((column % columns) == 0) |
|
1362 |
|
printf( |
|
1363 |
|
"\n%s ", indent); |
|
1364 |
|
++column; |
|
1365 |
|
printf("%3d", 0); |
|
1366 |
|
++index; |
|
1367 |
|
} |
|
1368 |
|
} |
|
1369 |
|
if (index > 0) |
|
1370 |
|
printf(","); |
|
1371 |
|
if ((column % columns) == 0) |
|
1372 |
|
printf( |
|
1373 |
|
"\n%s ", indent); |
|
1374 |
|
++column; |
|
1375 |
|
printf("%3d", kw->allchars_length); |
|
1376 |
|
++index; |
|
1377 |
|
/* deal with duplicates specially */ |
|
1378 |
|
if (kw->duplicate_link != 0) { |
|
1379 |
|
struct Keyword *links; |
|
1380 |
|
|
|
1381 |
|
links = kw->duplicate_link; |
|
1382 |
|
loop { |
|
1383 |
|
if (links == 0) |
|
1384 |
|
break; |
|
1385 |
|
printf(","); |
|
1386 |
|
if ((column % columns) == 0) |
|
1387 |
|
printf( |
|
1388 |
|
"\n%s ", indent); |
|
1389 |
|
++column; |
|
1390 |
|
printf("%3d", links->allchars_length); |
|
1391 |
|
++index; |
|
1392 |
|
links = links->duplicate_link; |
|
1393 |
|
} |
|
1394 |
|
} |
|
1395 |
|
tmp = tmp->next; |
|
1396 |
|
} |
|
1397 |
|
printf( |
|
1398 |
|
"\n%s };\n", indent); |
|
1399 |
|
if (OPTS(GLOBAL)) |
|
1400 |
|
printf( |
|
1401 |
|
"\n"); |
|
1402 |
|
}/*}}}*/ |
|
1403 |
|
/*{{{ output_keyword_table */ |
|
1404 |
|
/* prints out the array containing the keywords for the hash function */ |
|
1405 |
|
static void output_keyword_table(struct Output *t) |
|
1406 |
|
{ |
|
1407 |
|
u8 *indent; |
|
1408 |
|
s32 index; |
|
1409 |
|
struct Keyword_List *tmp; |
|
1410 |
|
|
|
1411 |
|
indent = OPTS(GLOBAL) ? "" : " "; |
|
1412 |
|
printf( |
|
1413 |
|
"%sstatic ", indent); |
|
1414 |
|
output_const_type(const_readonly_array, t->wordlist_eltype); |
|
1415 |
|
printf("%s[] =\n" |
|
1416 |
|
"%s {\n", options->wordlist_name, indent); |
|
1417 |
|
/* generate an array of reserved words at appropriate locations */ |
|
1418 |
|
tmp = t->head; |
|
1419 |
|
index = 0; |
|
1420 |
|
loop { |
|
1421 |
|
struct Keyword *kw; |
|
1422 |
|
|
|
1423 |
|
if (tmp == 0) |
|
1424 |
|
break; |
|
1425 |
|
kw = tmp->kw; |
|
1426 |
|
/* |
|
1427 |
|
* If generating a switch statement, and there is no user defined type, we generate |
|
1428 |
|
* non-duplicates directly in the code. Only duplicates go into the table. |
|
1429 |
|
*/ |
|
1430 |
|
if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0) |
|
1431 |
|
continue; |
|
1432 |
|
if (index > 0) |
|
1433 |
|
printf(",\n"); |
|
1434 |
|
if (index < kw->hash_value && !OPTS(SWITCH) && !OPTS(DUP)) { |
|
1435 |
|
/* some blank entries */ |
|
1436 |
|
output_keyword_blank_entries(kw->hash_value - index, indent); |
|
1437 |
|
printf(",\n"); |
|
1438 |
|
index = kw->hash_value; |
|
1439 |
|
} |
|
1440 |
|
kw->final_index = index; |
|
1441 |
|
output_keyword_entry(kw, index, indent, false); |
|
1442 |
|
/* deal with duplicates specially */ |
|
1443 |
|
if (kw->duplicate_link != 0) { /* implies option[DUP] */ |
|
1444 |
|
struct Keyword *links; |
|
1445 |
|
|
|
1446 |
|
links = kw->duplicate_link; |
|
1447 |
|
loop { |
|
1448 |
|
s32 stringpool_index; |
|
1449 |
|
|
|
1450 |
|
if (links == 0) |
|
1451 |
|
break; |
|
1452 |
|
++index; |
|
1453 |
|
links->final_index = index; |
|
1454 |
|
printf(",\n"); |
|
1455 |
|
stringpool_index = |
|
1456 |
|
(links->allchars_length == kw->allchars_length |
|
1457 |
|
&& memcmp(links->allchars, kw->allchars, |
|
1458 |
|
kw->allchars_length) == 0 |
|
1459 |
|
? kw->final_index : links->final_index); |
|
1460 |
|
output_keyword_entry(links, stringpool_index, indent, true); |
|
1461 |
|
links = links->duplicate_link; |
|
1462 |
|
} |
|
1463 |
|
} |
|
1464 |
|
++index; |
|
1465 |
|
tmp = tmp->next; |
|
1466 |
|
} |
|
1467 |
|
if (index > 0) |
|
1468 |
|
printf("\n"); |
|
1469 |
|
printf( |
|
1470 |
|
"%s };\n\n", indent); |
|
1471 |
|
}/*}}}*/ |
|
1472 |
|
/*{{{ output_lookup_array */ |
|
1473 |
|
/* |
|
1474 |
|
* generates the large, sparse table that maps hash values into the smaller, contiguous range of the |
|
1475 |
|
* keyword table |
|
1476 |
|
*/ |
|
1477 |
|
static void output_lookup_array(struct Output *t) |
|
1478 |
|
{ |
|
1479 |
|
s32 DEFAULT_VALUE; |
|
1480 |
|
struct Duplicate_Entry *duplicates; |
|
1481 |
|
s32 *lookup_array; |
|
1482 |
|
s32 lookup_array_size; |
|
1483 |
|
struct Duplicate_Entry *dup_ptr; |
|
1484 |
|
s32 *lookup_ptr; |
|
1485 |
|
struct Keyword_List *tmp; |
|
1486 |
|
s32 min; |
|
1487 |
|
s32 max; |
|
1488 |
|
u8 *indent; |
|
1489 |
|
s32 field_width; |
|
1490 |
|
s32 columns; |
|
1491 |
|
s32 column; |
|
1492 |
|
s32 i; |
|
1493 |
|
if (!OPTS(DUP)) |
|
1494 |
|
return; |
|
1495 |
|
|
|
1496 |
|
DEFAULT_VALUE = -1; |
|
1497 |
|
|
|
1498 |
|
duplicates = calloc(t->total_duplicates, sizeof(*duplicates)); |
|
1499 |
|
lookup_array = calloc(t->max_hash_value + 1 + 2 * t->total_duplicates, |
|
1500 |
|
sizeof(*lookup_array)); |
|
1501 |
|
lookup_array_size = t->max_hash_value + 1; |
|
1502 |
|
dup_ptr = &duplicates[0]; |
|
1503 |
|
lookup_ptr = &lookup_array[t->max_hash_value + 1 + 2 * t->total_duplicates]; |
|
1504 |
|
|
|
1505 |
|
loop { |
|
1506 |
|
if (lookup_ptr <= lookup_array) |
|
1507 |
|
break; |
|
1508 |
|
*--lookup_ptr = DEFAULT_VALUE; |
|
1509 |
|
} |
|
1510 |
|
/* now dup_ptr = &duplicates[0] and lookup_ptr = &lookup_array[0] */ |
|
1511 |
|
tmp = t->head; |
|
1512 |
|
loop { |
|
1513 |
|
s32 hash_value; |
|
1514 |
|
|
|
1515 |
|
if (tmp == 0) |
|
1516 |
|
break; |
|
1517 |
|
hash_value = tmp->kw->hash_value; |
|
1518 |
|
lookup_array[hash_value] = tmp->kw->final_index; |
|
1519 |
|
if (OPTS(DEBUG)) |
|
1520 |
|
fprintf(stderr, "keyword = %.*s, index = %d\n", tmp->kw->allchars_length, tmp->kw->allchars, tmp->kw->final_index); |
|
1521 |
|
if (tmp->kw->duplicate_link != 0) { |
|
1522 |
|
struct Keyword *ptr; |
|
1523 |
|
|
|
1524 |
|
/* start a duplicate entry */ |
|
1525 |
|
dup_ptr->hash_value = hash_value; |
|
1526 |
|
dup_ptr->index = tmp->kw->final_index; |
|
1527 |
|
dup_ptr->count = 1; |
|
1528 |
|
|
|
1529 |
|
ptr = tmp->kw->duplicate_link; |
|
1530 |
|
loop { |
|
1531 |
|
if (ptr != 0) |
|
1532 |
|
break; |
|
1533 |
|
++(dup_ptr->count); |
|
1534 |
|
if (OPTS(DEBUG)) |
|
1535 |
|
fprintf(stderr, "static linked keyword = %.*s, index = %d\n", ptr->allchars_length, ptr->allchars, ptr->final_index); |
|
1536 |
|
ptr = ptr->duplicate_link; |
|
1537 |
|
} |
|
1538 |
|
++dup_ptr; |
|
1539 |
|
} |
|
1540 |
|
tmp = tmp->next; |
|
1541 |
|
} |
|
1542 |
|
loop { |
|
1543 |
|
s32 i; |
|
1544 |
|
|
|
1545 |
|
if (dup_ptr <= duplicates) |
|
1546 |
|
break; |
|
1547 |
|
dup_ptr--; |
|
1548 |
|
if (OPTS(DEBUG)) |
|
1549 |
|
fprintf(stderr, "dup_ptr[%lu]: hash_value = %d, index = %d, count = %d\n", (unsigned long)(dup_ptr - duplicates), dup_ptr->hash_value, dup_ptr->index, dup_ptr->count); |
|
1550 |
|
/* |
|
1551 |
|
* start searching for available space towards the right part of the lookup |
|
1552 |
|
* array |
|
1553 |
|
*/ |
|
1554 |
|
i = dup_ptr->hash_value; |
|
1555 |
|
loop { |
|
1556 |
|
if (i >= lookup_array_size - 1) |
|
1557 |
|
break; |
|
1558 |
|
if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i + 1] |
|
1559 |
|
== DEFAULT_VALUE) |
|
1560 |
|
goto found_i; |
|
1561 |
|
++i; |
|
1562 |
|
} |
|
1563 |
|
/* if we didn't find it to the right look to the left instead... */ |
|
1564 |
|
i = dup_ptr->hash_value - 1; |
|
1565 |
|
loop { |
|
1566 |
|
if (i < 0) |
|
1567 |
|
break; |
|
1568 |
|
if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i + 1] |
|
1569 |
|
== DEFAULT_VALUE) |
|
1570 |
|
goto found_i; |
|
1571 |
|
i--; |
|
1572 |
|
} |
|
1573 |
|
/* append to the end of lookup_array */ |
|
1574 |
|
i = lookup_array_size; |
|
1575 |
|
lookup_array_size += 2; |
|
1576 |
|
found_i: |
|
1577 |
|
/* |
|
1578 |
|
* Put in an indirection from dup_ptr->_hash_value to i. |
|
1579 |
|
* At i and i+1 store dup_ptr->_final_index and dup_ptr->count. |
|
1580 |
|
*/ |
|
1581 |
|
lookup_array[dup_ptr->hash_value] = - 1 - t->total_keys - i; |
|
1582 |
|
lookup_array[i] = - t->total_keys + dup_ptr->index; |
|
1583 |
|
lookup_array[i + 1] = - dup_ptr->count; |
|
1584 |
|
/* All these three values are <= -2, distinct from DEFAULT_VALUE */ |
|
1585 |
|
} |
|
1586 |
|
/* the values of the lookup array are now known */ |
|
1587 |
|
min = S32_MAX; |
|
1588 |
|
max = S32_MIN; |
|
1589 |
|
lookup_ptr = lookup_array + lookup_array_size; |
|
1590 |
|
loop { |
|
1591 |
|
s32 val; |
|
1592 |
|
|
|
1593 |
|
if (lookup_ptr <= lookup_array) |
|
1594 |
|
break; |
|
1595 |
|
val = *--lookup_ptr; |
|
1596 |
|
if (min > val) |
|
1597 |
|
min = val; |
|
1598 |
|
if (max < val) |
|
1599 |
|
max = val; |
|
1600 |
|
} |
|
1601 |
|
indent = OPTS(GLOBAL) ? "" : " "; |
|
1602 |
|
printf( |
|
1603 |
|
"%sstatic %s%s lookup[] =\n" |
|
1604 |
|
"%s {", indent, const_readonly_array, smallest_integral_type_2(min, max), indent); |
|
1605 |
|
/* calculate maximum number of digits required for MIN..MAX */ |
|
1606 |
|
{ |
|
1607 |
|
s32 trunc; |
|
1608 |
|
|
|
1609 |
|
field_width = 2; |
|
1610 |
|
trunc = max; |
|
1611 |
|
loop { |
|
1612 |
|
trunc /= 10; |
|
1613 |
|
if (trunc <= 0) |
|
1614 |
|
break; |
|
1615 |
|
++field_width; |
|
1616 |
|
} |
|
1617 |
|
} |
|
1618 |
|
if (min < 0) { |
|
1619 |
|
s32 neg_field_width; |
|
1620 |
|
s32 trunc; |
|
1621 |
|
|
|
1622 |
|
neg_field_width = 2; |
|
1623 |
|
trunc = -min; |
|
1624 |
|
loop { |
|
1625 |
|
trunc /= 10; |
|
1626 |
|
if (trunc <= 0) |
|
1627 |
|
break; |
|
1628 |
|
++neg_field_width; |
|
1629 |
|
} |
|
1630 |
|
++neg_field_width; /* account for the minus sign */ |
|
1631 |
|
if (field_width < neg_field_width) |
|
1632 |
|
field_width = neg_field_width; |
|
1633 |
|
} |
|
1634 |
|
columns = 42 / field_width; |
|
1635 |
|
column = 0; |
|
1636 |
|
i = 0; |
|
1637 |
|
loop { |
|
1638 |
|
if (i >= lookup_array_size) |
|
1639 |
|
break; |
|
1640 |
|
if (i > 0) |
|
1641 |
|
printf(","); |
|
1642 |
|
if ((column % columns) == 0) |
|
1643 |
|
printf("\n%s ", indent); |
|
1644 |
|
++column; |
|
1645 |
|
printf("%*d", field_width, lookup_array[i]); |
|
1646 |
|
++i; |
|
1647 |
|
} |
|
1648 |
|
printf( |
|
1649 |
|
"\n%s };\n\n", indent); |
|
1650 |
|
free(duplicates); |
|
1651 |
|
free(lookup_array); |
|
1652 |
|
}/*}}}*/ |
|
1653 |
|
/*{{{ output_lookup_function */ |
|
1654 |
|
/* generates C code for the lookup function */ |
|
1655 |
|
static void output_lookup_function(struct Output *t) |
|
1656 |
|
{ |
|
1657 |
|
/* output the function's head */ |
|
1658 |
|
/* |
|
1659 |
|
* We don't declare the lookup function 'static' because we cannot make assumptions about |
|
1660 |
|
* the compilation units of the user. |
|
1661 |
|
* Since we don't make it 'static', it makes no sense to declare it 'inline', because |
|
1662 |
|
* non-static inline functions must not reference static functions or variables, see ISO C |
|
1663 |
|
* 99 section 6.7.4.(3). |
|
1664 |
|
*/ |
|
1665 |
|
printf( |
|
1666 |
|
"%s%s\n", const_for_struct, t->return_type); |
|
1667 |
|
if (OPTS(CPLUSPLUS)) |
|
1668 |
|
printf( |
|
1669 |
|
"%s::", options->class_name); |
|
1670 |
|
printf("%s ", options->function_name); |
|
1671 |
|
printf( |
|
1672 |
|
OPTS(KRC) ? "(str, len)\n" |
|
1673 |
|
" %schar *str;\n" |
|
1674 |
|
" %ssize_t len;\n" : |
|
1675 |
|
OPTS(C) ? "(str, len)\n" |
|
1676 |
|
" %sconst char *str;\n" |
|
1677 |
|
" %ssize_t len;\n" : |
|
1678 |
|
OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *str, %ssize_t len)\n" : |
|
1679 |
|
"", register_scs, register_scs); |
|
1680 |
|
|
|
1681 |
|
/* output the function's body */ |
|
1682 |
|
printf( |
|
1683 |
|
"{\n"); |
|
1684 |
|
if (OPTS(ENUM) && !OPTS(GLOBAL)) |
|
1685 |
|
output_constants_enum(t, " "); |
|
1686 |
|
if (OPTS(SHAREDLIB) && !(OPTS(GLOBAL) || OPTS(TYPE))) |
|
1687 |
|
output_lookup_pools(t); |
|
1688 |
|
if (!OPTS(GLOBAL)) |
|
1689 |
|
output_lookup_tables(t); |
|
1690 |
|
if (OPTS(LENTABLE)) |
|
1691 |
|
output_lookup_function_body(t, output_comparison_memcmp); |
|
1692 |
|
else { |
|
1693 |
|
if (OPTS(COMP)) |
|
1694 |
|
output_lookup_function_body(t, output_comparison_strncmp); |
|
1695 |
|
else |
|
1696 |
|
output_lookup_function_body(t, output_comparison_strcmp); |
|
1697 |
|
} |
|
1698 |
|
printf( |
|
1699 |
|
"}\n"); |
|
1700 |
|
}/*}}}*/ |
|
1701 |
|
/*{{{ output_lookup_function_body */ |
|
1702 |
|
static void output_lookup_function_body(struct Output *t, |
|
1703 |
|
void (*output_comparison)(u8 *expr1, u8 *expr2)) |
|
1704 |
|
{ |
|
1705 |
|
printf( |
|
1706 |
|
" if (len <= %sMAX_WORD_LENGTH && len >= %sMIN_WORD_LENGTH)\n" |
|
1707 |
|
" {\n" |
|
1708 |
|
" %sunsigned int key = %s (str, len);\n\n", options->constants_prefix, |
|
1709 |
|
options->constants_prefix, register_scs, options->hash_name); |
|
1710 |
|
if (OPTS(SWITCH)) { |
|
1711 |
|
s32 switch_size; |
|
1712 |
|
s32 num_switches; |
|
1713 |
|
|
|
1714 |
|
switch_size = output_num_hash_values(t); |
|
1715 |
|
num_switches = options->total_switches; |
|
1716 |
|
if (num_switches > switch_size) |
|
1717 |
|
num_switches = switch_size; |
|
1718 |
|
printf( |
|
1719 |
|
" if (key <= %sMAX_HASH_VALUE", options->constants_prefix); |
|
1720 |
|
if (t->min_hash_value > 0) |
|
1721 |
|
printf( |
|
1722 |
|
" && key >= %sMIN_HASH_VALUE", options->constants_prefix); |
|
1723 |
|
printf ( |
|
1724 |
|
")\n" |
|
1725 |
|
" {\n"); |
|
1726 |
|
if (OPTS(DUP) && t->total_duplicates > 0) { |
|
1727 |
|
if (OPTS(LENTABLE)) |
|
1728 |
|
printf( |
|
1729 |
|
" %s%s%s *lengthptr;\n", register_scs, const_always, smallest_integral_type( |
|
1730 |
|
t->max_key_len)); |
|
1731 |
|
printf( |
|
1732 |
|
" %s", register_scs); |
|
1733 |
|
output_const_type(const_readonly_array, t->wordlist_eltype); |
|
1734 |
|
printf("*wordptr;\n"); |
|
1735 |
|
printf( |
|
1736 |
|
" %s", register_scs); |
|
1737 |
|
output_const_type(const_readonly_array, t->wordlist_eltype); |
|
1738 |
|
printf("*wordendptr;\n"); |
|
1739 |
|
} |
|
1740 |
|
if (OPTS(TYPE)) { |
|
1741 |
|
printf( |
|
1742 |
|
" %s", register_scs); |
|
1743 |
|
output_const_type(const_readonly_array, t->struct_tag); |
|
1744 |
|
printf("*resword;\n\n"); |
|
1745 |
|
} else |
|
1746 |
|
printf( |
|
1747 |
|
" %s%sresword;\n\n", register_scs, t->struct_tag); |
|
1748 |
|
output_switches(t->head, num_switches, switch_size, t->min_hash_value, |
|
1749 |
|
t->max_hash_value, 10); |
|
1750 |
|
printf( |
|
1751 |
|
" return 0;\n"); |
|
1752 |
|
if (OPTS(DUP) && t->total_duplicates > 0) { |
|
1753 |
|
s32 indent; |
|
1754 |
|
|
|
1755 |
|
indent = 8; |
|
1756 |
|
printf( |
|
1757 |
|
"%*smulticompare:\n" |
|
1758 |
|
"%*s while (wordptr < wordendptr)\n" |
|
1759 |
|
"%*s {\n", indent, "", indent, "", indent, ""); |
|
1760 |
|
if (OPTS(LENTABLE)) { |
|
1761 |
|
printf( |
|
1762 |
|
"%*s if (len == *lengthptr)\n" |
|
1763 |
|
"%*s {\n", indent, "", indent, ""); |
|
1764 |
|
indent += 4; |
|
1765 |
|
} |
|
1766 |
|
printf( |
|
1767 |
|
"%*s %s%schar *s = ", indent, "", register_scs, const_always); |
|
1768 |
|
if (OPTS(TYPE)) |
|
1769 |
|
printf("wordptr->%s", options->slot_name); |
|
1770 |
|
else |
|
1771 |
|
printf("*wordptr"); |
|
1772 |
|
if (OPTS(SHAREDLIB)) |
|
1773 |
|
printf(" + %s", options->stringpool_name); |
|
1774 |
|
printf(";\n\n" |
|
1775 |
|
"%*s if (", indent, ""); |
|
1776 |
|
output_comparison("str", "s"); |
|
1777 |
|
printf(")\n" |
|
1778 |
|
"%*s return %s;\n", indent, "", OPTS(TYPE) ? "wordptr" : "s"); |
|
1779 |
|
if (OPTS(LENTABLE)) { |
|
1780 |
|
indent -= 4; |
|
1781 |
|
printf( |
|
1782 |
|
"%*s }\n", indent, ""); |
|
1783 |
|
} |
|
1784 |
|
if (OPTS(LENTABLE)) |
|
1785 |
|
printf( |
|
1786 |
|
"%*s lengthptr++;\n", indent, ""); |
|
1787 |
|
printf( |
|
1788 |
|
"%*s wordptr++;\n" |
|
1789 |
|
"%*s }\n" |
|
1790 |
|
"%*s return 0;\n", indent, "", indent, "", indent, ""); |
|
1791 |
|
} |
|
1792 |
|
printf( |
|
1793 |
|
" compare:\n"); |
|
1794 |
|
if (OPTS(TYPE)) { |
|
1795 |
|
printf( |
|
1796 |
|
" {\n" |
|
1797 |
|
" %s%schar *s = resword->%s", register_scs, const_always, options->slot_name); |
|
1798 |
|
if (OPTS(SHAREDLIB)) |
|
1799 |
|
printf(" + %s", options->stringpool_name); |
|
1800 |
|
printf(";\n\n" |
|
1801 |
|
" if ("); |
|
1802 |
|
output_comparison("str", "s"); |
|
1803 |
|
printf( |
|
1804 |
|
")\n" |
|
1805 |
|
" return resword;\n" |
|
1806 |
|
" }\n"); |
|
1807 |
|
} else { |
|
1808 |
|
output_comparison("str", "resword"); |
|
1809 |
|
printf( |
|
1810 |
|
")\n" |
|
1811 |
|
" return resword;\n"); |
|
1812 |
|
} |
|
1813 |
|
printf( |
|
1814 |
|
" }\n"); |
|
1815 |
|
} else { |
|
1816 |
|
printf( |
|
1817 |
|
" if (key <= %sMAX_HASH_VALUE)\n", options->constants_prefix); |
|
1818 |
|
if (OPTS(DUP)) { |
|
1819 |
|
s32 indent; |
|
1820 |
|
|
|
1821 |
|
indent = 8; |
|
1822 |
|
printf( |
|
1823 |
|
"%*s{\n" |
|
1824 |
|
"%*s %sint index = lookup[key];\n\n" |
|
1825 |
|
"%*s if (index >= 0)\n", indent, "", indent, "", register_scs, indent, ""); |
|
1826 |
|
if (OPTS(LENTABLE)) { |
|
1827 |
|
printf( |
|
1828 |
|
"%*s {\n" |
|
1829 |
|
"%*s if (len == %s[index])\n", indent, "", indent, "", options->lengthtable_name); |
|
1830 |
|
indent += 4; |
|
1831 |
|
} |
|
1832 |
|
printf( |
|
1833 |
|
"%*s {\n" |
|
1834 |
|
"%*s %s%schar *s = %s[index]", indent, "", indent, "", register_scs, const_always, |
|
1835 |
|
options->wordlist_name); |
|
1836 |
|
if (OPTS(TYPE)) |
|
1837 |
|
printf(".%s", options->slot_name); |
|
1838 |
|
if (OPTS(SHAREDLIB)) |
|
1839 |
|
printf (" + %s", options->stringpool_name); |
|
1840 |
|
printf(";\n\n" |
|
1841 |
|
"%*s if (", indent, ""); |
|
1842 |
|
output_comparison("str", "s"); |
|
1843 |
|
printf (")\n" |
|
1844 |
|
"%*s return ", indent, ""); |
|
1845 |
|
if (OPTS(TYPE)) |
|
1846 |
|
printf("&%s[index]", options->wordlist_name); |
|
1847 |
|
else |
|
1848 |
|
printf("s"); |
|
1849 |
|
printf(";\n" |
|
1850 |
|
"%*s }\n", indent, ""); |
|
1851 |
|
if (OPTS(LENTABLE)) { |
|
1852 |
|
indent -= 4; |
|
1853 |
|
printf( |
|
1854 |
|
"%*s }\n", indent, ""); |
|
1855 |
|
} |
|
1856 |
|
if (t->total_duplicates > 0) { |
|
1857 |
|
printf( |
|
1858 |
|
"%*s else if (index < -%sTOTAL_KEYWORDS)\n" |
|
1859 |
|
"%*s {\n" |
|
1860 |
|
"%*s %sint offset = - 1 - %sTOTAL_KEYWORDS - index;\n", indent, "", options->constants_prefix, |
|
1861 |
|
indent, "", indent, "", register_scs, |
|
1862 |
|
options->constants_prefix); |
|
1863 |
|
if (OPTS(LENTABLE)) |
|
1864 |
|
printf( |
|
1865 |
|
"%*s %s%s%s *lengthptr = &%s[%sTOTAL_KEYWORDS + lookup[offset]];\n", indent, "", |
|
1866 |
|
register_scs, const_always, |
|
1867 |
|
smallest_integral_type(t->max_key_len), |
|
1868 |
|
options->lengthtable_name, |
|
1869 |
|
options->constants_prefix); |
|
1870 |
|
printf( |
|
1871 |
|
"%*s %s", indent, "", register_scs); |
|
1872 |
|
output_const_type(const_readonly_array, t->wordlist_eltype); |
|
1873 |
|
printf("*wordptr = &%s[%sTOTAL_KEYWORDS + lookup[offset]];\n", |
|
1874 |
|
options->wordlist_name, options->constants_prefix); |
|
1875 |
|
printf( |
|
1876 |
|
"%*s %s", indent, "", register_scs); |
|
1877 |
|
output_const_type(const_readonly_array, t->wordlist_eltype); |
|
1878 |
|
printf("*wordendptr = wordptr + -lookup[offset + 1];\n\n"); |
|
1879 |
|
printf( |
|
1880 |
|
"%*s while (wordptr < wordendptr)\n" |
|
1881 |
|
"%*s {\n", indent, "", indent, ""); |
|
1882 |
|
if (OPTS(LENTABLE)) { |
|
1883 |
|
printf( |
|
1884 |
|
"%*s if (len == *lengthptr)\n" |
|
1885 |
|
"%*s {\n", indent, "", indent, ""); |
|
1886 |
|
indent += 4; |
|
1887 |
|
} |
|
1888 |
|
printf( |
|
1889 |
|
"%*s %s%schar *s = ", indent, "", register_scs, const_always); |
|
1890 |
|
if (OPTS(TYPE)) |
|
1891 |
|
printf("wordptr->%s", options->slot_name); |
|
1892 |
|
else |
|
1893 |
|
printf("*wordptr"); |
|
1894 |
|
if (OPTS(SHAREDLIB)) |
|
1895 |
|
printf(" + %s", options->stringpool_name); |
|
1896 |
|
printf (";\n\n" |
|
1897 |
|
"%*s if (", indent, ""); |
|
1898 |
|
output_comparison("str", "s"); |
|
1899 |
|
printf (")\n" |
|
1900 |
|
"%*s return %s;\n", indent, "", OPTS(TYPE) ? "wordptr" : "s"); |
|
1901 |
|
if (OPTS(LENTABLE)) { |
|
1902 |
|
indent -= 4; |
|
1903 |
|
printf( |
|
1904 |
|
"%*s }\n", indent, ""); |
|
1905 |
|
} |
|
1906 |
|
if (OPTS(LENTABLE)) |
|
1907 |
|
printf( |
|
1908 |
|
"%*s lengthptr++;\n", indent, ""); |
|
1909 |
|
printf( |
|
1910 |
|
"%*s wordptr++;\n" |
|
1911 |
|
"%*s }\n" |
|
1912 |
|
"%*s }\n", indent, "", indent, "", indent, ""); |
|
1913 |
|
} |
|
1914 |
|
printf( |
|
1915 |
|
"%*s}\n", indent, ""); |
|
1916 |
|
} else { |
|
1917 |
|
s32 indent; |
|
1918 |
|
|
|
1919 |
|
indent = 8; |
|
1920 |
|
if (OPTS(LENTABLE)) { |
|
1921 |
|
printf( |
|
1922 |
|
"%*sif (len == %s[key])\n", indent, "", options->lengthtable_name); |
|
1923 |
|
indent += 2; |
|
1924 |
|
} |
|
1925 |
|
if (OPTS(SHAREDLIB)) { |
|
1926 |
|
if (!OPTS(LENTABLE)) { |
|
1927 |
|
printf( |
|
1928 |
|
"%*s{\n" |
|
1929 |
|
"%*s %sint o = %s[key]", indent, "", indent, "", register_scs, |
|
1930 |
|
options->wordlist_name); |
|
1931 |
|
if (OPTS(TYPE)) |
|
1932 |
|
printf(".%s", options->slot_name); |
|
1933 |
|
printf (";\n" |
|
1934 |
|
"%*s if (o >= 0)\n" |
|
1935 |
|
"%*s {\n", indent, "", indent, ""); |
|
1936 |
|
indent += 4; |
|
1937 |
|
printf( |
|
1938 |
|
"%*s %s%schar *s = o", indent, "", register_scs, const_always); |
|
1939 |
|
} else { |
|
1940 |
|
/* |
|
1941 |
|
* no need for the (o >= 0) test, because the |
|
1942 |
|
* (len == lengthtable[key]) test already guarantees that |
|
1943 |
|
* key points to nonempty table entry |
|
1944 |
|
*/ |
|
1945 |
|
printf ( |
|
1946 |
|
"%*s{\n" |
|
1947 |
|
"%*s %s%schar *s = %s[key]", indent, "", indent, "", register_scs, const_always, |
|
1948 |
|
options->wordlist_name); |
|
1949 |
|
if (OPTS(TYPE)) |
|
1950 |
|
printf(".%s", options->slot_name); |
|
1951 |
|
} |
|
1952 |
|
printf (" + %s", options->stringpool_name); |
|
1953 |
|
} else { |
|
1954 |
|
printf( |
|
1955 |
|
"%*s{\n" |
|
1956 |
|
"%*s %s%schar *s = %s[key]", indent, "", indent, "", register_scs, const_always, |
|
1957 |
|
options->wordlist_name); |
|
1958 |
|
if (OPTS(TYPE)) |
|
1959 |
|
printf(".%s", options->slot_name); |
|
1960 |
|
} |
|
1961 |
|
printf (";\n\n" |
|
1962 |
|
"%*s if (", indent, ""); |
|
1963 |
|
if (!OPTS(SHAREDLIB) && OPTS(NULLSTRINGS)) |
|
1964 |
|
printf ("s && "); |
|
1965 |
|
output_comparison("str", "s"); |
|
1966 |
|
printf (")\n" |
|
1967 |
|
"%*s return ", indent, ""); |
|
1968 |
|
if (OPTS(TYPE)) |
|
1969 |
|
printf("&%s[key]", options->wordlist_name); |
|
1970 |
|
else |
|
1971 |
|
printf("s"); |
|
1972 |
|
printf(";\n"); |
|
1973 |
|
if (OPTS(SHAREDLIB) && !OPTS(LENTABLE)) { |
|
1974 |
|
indent -= 4; |
|
1975 |
|
printf( |
|
1976 |
|
"%*s }\n", indent, ""); |
|
1977 |
|
} |
|
1978 |
|
printf( |
|
1979 |
|
"%*s}\n", indent, ""); |
|
1980 |
|
} |
|
1981 |
|
} |
|
1982 |
|
printf( |
|
1983 |
|
" }\n" |
|
1984 |
|
" return 0;\n"); |
|
1985 |
|
}/*}}}*/ |
|
1986 |
|
/*{{{ output_num_hash_values */ |
|
1987 |
|
/* Returns the number of different hash values. */ |
|
1988 |
|
static s32 output_num_hash_values(struct Output *t) |
|
1989 |
|
{ |
|
1990 |
|
s32 count; |
|
1991 |
|
struct Keyword_List *tmp; |
|
1992 |
|
/* |
|
1993 |
|
* since the list is already sorted by hash value and doesn't contain duplicates, we can |
|
1994 |
|
* simply count the number of keywords on the list |
|
1995 |
|
*/ |
|
1996 |
|
count = 0; |
|
1997 |
|
tmp = t->head; |
|
1998 |
|
loop { |
|
1999 |
|
if (tmp == 0) |
|
2000 |
|
break; |
|
2001 |
|
++count; |
|
2002 |
|
tmp = tmp->next; |
|
2003 |
|
} |
|
2004 |
|
return count; |
|
2005 |
|
}/*}}}*/ |
|
2006 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
2007 |
|
#undef USE_DOWNCASE_TABLE |
|
2008 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
2009 |
|
#define EPILOG |
|
2010 |
|
#include "namespace/globals.h" |
|
2011 |
|
#include "namespace/options.h" |
|
2012 |
|
#include "namespace/output.h" |
|
2013 |
|
#include "namespace/output.c" |
|
2014 |
|
#include "namespace/keyword.h" |
|
2015 |
|
#include "namespace/keyword_list.h" |
|
2016 |
|
#include "namespace/positions.h" |
|
2017 |
|
#undef EPILOG |
|
2018 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
2019 |
|
#endif |
File search.c added (mode: 100644) (index 0000000..294deac) |
|
1 |
|
#ifndef CGPERF_SEARCH_C |
|
2 |
|
#define CGPERF_SEARCH_C |
|
3 |
|
#include <stdlib.h> |
|
4 |
|
#include <string.h> |
|
5 |
|
#include <time.h> |
|
6 |
|
#include <math.h> |
|
7 |
|
#include "c_fixing.h" |
|
8 |
|
#include "globals.h" |
|
9 |
|
#include "search.h" |
|
10 |
|
#include "keyword.h" |
|
11 |
|
#include "keyword_list.h" |
|
12 |
|
#include "options.h" |
|
13 |
|
#include "positions.h" |
|
14 |
|
#include "hash-table.h" |
|
15 |
|
#include "bool-array.h" |
|
16 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
17 |
|
#include "namespace/globals.h" |
|
18 |
|
#include "namespace/search.h" |
|
19 |
|
#include "namespace/keyword.h" |
|
20 |
|
#include "namespace/keyword_list.h" |
|
21 |
|
#include "namespace/options.h" |
|
22 |
|
#include "namespace/positions.h" |
|
23 |
|
#include "namespace/hash-table.h" |
|
24 |
|
#include "namespace/bool-array.h" |
|
25 |
|
#include "namespace/search.c" |
|
26 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
27 |
|
/*{{{ THEORY */ |
|
28 |
|
/* The general form of the hash function is |
|
29 |
|
|
|
30 |
|
hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos) |
|
31 |
|
+ len (keyword) |
|
32 |
|
|
|
33 |
|
where Pos is a set of byte positions, |
|
34 |
|
each alpha_inc[i] is a nonnegative integer, |
|
35 |
|
each asso_values[c] is a nonnegative integer, |
|
36 |
|
len (keyword) is the keyword's length if _hash_includes_len, or 0 otherwise. |
|
37 |
|
|
|
38 |
|
Theorem 1: If all keywords are different, there is a set Pos such that |
|
39 |
|
all tuples (keyword[i] : i in Pos) are different. |
|
40 |
|
|
|
41 |
|
Theorem 2: If all tuples (keyword[i] : i in Pos) are different, there |
|
42 |
|
are nonnegative integers alpha_inc[i] such that all multisets |
|
43 |
|
{keyword[i] + alpha_inc[i] : i in Pos} are different. |
|
44 |
|
|
|
45 |
|
Define selchars[keyword] := {keyword[i] + alpha_inc[i] : i in Pos}. |
|
46 |
|
|
|
47 |
|
Theorem 3: If all multisets selchars[keyword] are different, there are |
|
48 |
|
nonnegative integers asso_values[c] such that all hash values |
|
49 |
|
sum (asso_values[c] : c in selchars[keyword]) are different. |
|
50 |
|
|
|
51 |
|
Based on these three facts, we find the hash function in three steps: |
|
52 |
|
|
|
53 |
|
Step 1 (Finding good byte positions): |
|
54 |
|
Find a set Pos, as small as possible, such that all tuples |
|
55 |
|
(keyword[i] : i in Pos) are different. |
|
56 |
|
|
|
57 |
|
Step 2 (Finding good alpha increments): |
|
58 |
|
Find nonnegative integers alpha_inc[i], as many of them as possible being |
|
59 |
|
zero, and the others being as small as possible, such that all multisets |
|
60 |
|
{keyword[i] + alpha_inc[i] : i in Pos} are different. |
|
61 |
|
|
|
62 |
|
Step 3 (Finding good asso_values): |
|
63 |
|
Find asso_values[c] such that all hash (keyword) are different. |
|
64 |
|
|
|
65 |
|
In other words, each step finds a projection that is injective on the |
|
66 |
|
given finite set: |
|
67 |
|
proj1 : String --> Map (Pos --> N) |
|
68 |
|
proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos) |
|
69 |
|
proj3 : Map (Pos --> N) / S(Pos) --> N |
|
70 |
|
where |
|
71 |
|
N denotes the set of nonnegative integers, |
|
72 |
|
Map (A --> B) := Hom_Set (A, B) is the set of maps from A to B, and |
|
73 |
|
S(Pos) is the symmetric group over Pos. |
|
74 |
|
|
|
75 |
|
This was the theory for !_hash_includes_len; if _hash_includes_len, slight |
|
76 |
|
modifications apply: |
|
77 |
|
proj1 : String --> Map (Pos --> N) x N |
|
78 |
|
proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N |
|
79 |
|
proj3 : Map (Pos --> N) / S(Pos) x N --> N |
|
80 |
|
|
|
81 |
|
For a case-insensitive hash function, the general form is |
|
82 |
|
|
|
83 |
|
hash (keyword) = |
|
84 |
|
sum (asso_values[alpha_unify[keyword[i] + alpha_inc[i]]] : i in Pos) |
|
85 |
|
+ len (keyword) |
|
86 |
|
|
|
87 |
|
where alpha_unify[c] is chosen so that an upper/lower case change in |
|
88 |
|
keyword[i] doesn't change alpha_unify[keyword[i] + alpha_inc[i]]. |
|
89 |
|
*//*}}} THEORY -- END */ |
|
90 |
|
/*{{{ finding asso_values[] that fit |
|
91 |
|
The idea is to choose the _asso_values[] one by one, in a way that |
|
92 |
|
a choice that has been made never needs to be undone later. This |
|
93 |
|
means that we split the work into several steps. Each step chooses |
|
94 |
|
one or more _asso_values[c]. The result of choosing one or more |
|
95 |
|
_asso_values[c] is that the partitioning of the keyword set gets |
|
96 |
|
broader. |
|
97 |
|
Look at this partitioning: After every step, the _asso_values[] of a |
|
98 |
|
certain set C of characters are undetermined. (At the beginning, C |
|
99 |
|
is the set of characters c with _occurrences[c] > 0. At the end, C |
|
100 |
|
is empty.) To each keyword K, we associate the multiset of _selchars |
|
101 |
|
for which the _asso_values[] are undetermined: |
|
102 |
|
K --> K->_selchars intersect C. |
|
103 |
|
Consider two keywords equivalent if their value under this mapping is |
|
104 |
|
the same. This introduces an equivalence relation on the set of |
|
105 |
|
keywords. The equivalence classes partition the keyword set. (At the |
|
106 |
|
beginning, the partition is the finest possible: each K is an equivalence |
|
107 |
|
class by itself, because all K have a different _selchars. At the end, |
|
108 |
|
all K have been merged into a single equivalence class.) |
|
109 |
|
The partition before a step is always a refinement of the partition |
|
110 |
|
after the step. |
|
111 |
|
We choose the steps in such a way that the partition really becomes |
|
112 |
|
broader at each step. (A step that only chooses an _asso_values[c] |
|
113 |
|
without changing the partition is better merged with the previous step, |
|
114 |
|
to avoid useless backtracking.) }}}*/ |
|
115 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
116 |
|
/*{{{ local */ |
|
117 |
|
/*{{{ types */ |
|
118 |
|
struct EquivalenceClass |
|
119 |
|
{ |
|
120 |
|
/* the keywords in this equivalence class */ |
|
121 |
|
struct Keyword_List *keywords; |
|
122 |
|
struct Keyword_List *keywords_last; |
|
123 |
|
/* the number of keywords in this equivalence class */ |
|
124 |
|
u32 cardinality; |
|
125 |
|
/* |
|
126 |
|
* the undetermined selected characters for the keywords in this equivalence class, as a |
|
127 |
|
* canonically reordered multiset |
|
128 |
|
*/ |
|
129 |
|
u32 *undetermined_chars; |
|
130 |
|
u32 undetermined_chars_length; |
|
131 |
|
|
|
132 |
|
struct EquivalenceClass *next; |
|
133 |
|
}; |
|
134 |
|
|
|
135 |
|
struct Step |
|
136 |
|
{ |
|
137 |
|
/* the characters whose values are being determined in this step */ |
|
138 |
|
u32 changing_count; |
|
139 |
|
u32 *changing; |
|
140 |
|
/* |
|
141 |
|
* Exclusive upper bound for the _asso_values[c] of this step. A power |
|
142 |
|
* of 2. |
|
143 |
|
*/ |
|
144 |
|
u32 asso_value_max; |
|
145 |
|
/* the characters whose values will be determined after this step */ |
|
146 |
|
bool *undetermined; |
|
147 |
|
/* the keyword set partition after this step */ |
|
148 |
|
struct EquivalenceClass *partition; |
|
149 |
|
/* the expected number of iterations in this step */ |
|
150 |
|
f64 expected_lower; |
|
151 |
|
f64 expected_upper; |
|
152 |
|
|
|
153 |
|
struct Step *next; |
|
154 |
|
}; |
|
155 |
|
/*}}} types -- END */ |
|
156 |
|
/*{{{ code */ |
|
157 |
|
/*{{{ equals */ |
|
158 |
|
static bool equals(u32 *ptr1, u32 *ptr2, u32 len) |
|
159 |
|
{ |
|
160 |
|
loop { |
|
161 |
|
if (len == 0) |
|
162 |
|
break; |
|
163 |
|
if (*ptr1 != *ptr2) |
|
164 |
|
return false; |
|
165 |
|
++ptr1; |
|
166 |
|
++ptr2; |
|
167 |
|
len--; |
|
168 |
|
} |
|
169 |
|
return true; |
|
170 |
|
}/*}}}*/ |
|
171 |
|
static void delete_partition(struct EquivalenceClass *partition) |
|
172 |
|
{ |
|
173 |
|
loop { |
|
174 |
|
struct EquivalenceClass *equclass; |
|
175 |
|
|
|
176 |
|
if (partition == 0) |
|
177 |
|
break; |
|
178 |
|
equclass = partition; |
|
179 |
|
partition = equclass->next; |
|
180 |
|
delete_list(equclass->keywords); |
|
181 |
|
free(equclass); |
|
182 |
|
} |
|
183 |
|
} |
|
184 |
|
static bool less_by_hash_value(struct Keyword *kw1, struct Keyword *kw2) |
|
185 |
|
{ |
|
186 |
|
return kw1->hash_value < kw2->hash_value; |
|
187 |
|
} |
|
188 |
|
/*}}} code -- END */ |
|
189 |
|
/*}}} local -- END */ |
|
190 |
|
/*------------------------------------------------------------------------------------------------*/ |
|
191 |
|
/*{{{ schr_new */ |
|
192 |
|
static struct Search *schr_new(struct Keyword_List *list) |
|
193 |
|
{ |
|
194 |
|
struct Search *t; |
|
195 |
|
|
|
196 |
|
t = calloc(1, sizeof(*t)); |
|
197 |
|
t->head = list; |
|
198 |
|
t->key_positions = pos_new(); |
|
199 |
|
return t; |
|
200 |
|
}/*}}}*/ |
|
201 |
|
/*{{{ schr_del */ |
|
202 |
|
static void schr_del(struct Search *t) |
|
203 |
|
{ |
|
204 |
|
ba_del(t->collision_detector); |
|
205 |
|
if (OPTS(DEBUG)) { |
|
206 |
|
u32 i; |
|
207 |
|
s32 field_width; |
|
208 |
|
struct Keyword_List *ptr; |
|
209 |
|
|
|
210 |
|
fprintf(stderr, "\ndumping occurrence and associated values tables\n"); |
|
211 |
|
i = 0; |
|
212 |
|
loop { |
|
213 |
|
if (i >= t->alpha_size) |
|
214 |
|
break; |
|
215 |
|
if (t->occurrences[i]) |
|
216 |
|
fprintf (stderr, "asso_values[%c] = %6d, occurrences[%c] = %6d\n", i, t->asso_values[i], i, t->occurrences[i]); |
|
217 |
|
++i; |
|
218 |
|
} |
|
219 |
|
fprintf(stderr, "end table dumping\n"); |
|
220 |
|
fprintf(stderr, "\nDumping key list information:\ntotal non-static linked keywords = %d\ntotal keywords = %d\ntotal duplicates = %d\nmaximum key length = %d\n", t->list_len, t->total_keys, t->total_duplicates, t->max_key_len); |
|
221 |
|
field_width = t->max_selchars_length; |
|
222 |
|
fprintf(stderr, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n", field_width, "selchars"); |
|
223 |
|
ptr = t->head; |
|
224 |
|
loop { |
|
225 |
|
s32 j; |
|
226 |
|
|
|
227 |
|
if (ptr == 0) |
|
228 |
|
break; |
|
229 |
|
fprintf(stderr, "%11d,%11d,%6d, ", ptr->kw->hash_value, ptr->kw->allchars_length, ptr->kw->final_index); |
|
230 |
|
if (field_width > ptr->kw->selchars_length) |
|
231 |
|
fprintf(stderr, "%*s", field_width - ptr->kw->selchars_length, ""); |
|
232 |
|
j = 0; |
|
233 |
|
loop { |
|
234 |
|
if (j >= ptr->kw->selchars_length) |
|
235 |
|
break; |
|
236 |
|
putc(ptr->kw->selchars[j], stderr); |
|
237 |
|
++j; |
|
238 |
|
} |
|
239 |
|
fprintf(stderr, ", %.*s\n", ptr->kw->allchars_length, ptr->kw->allchars); |
|
240 |
|
ptr = ptr->next; |
|
241 |
|
} |
|
242 |
|
fprintf(stderr, "End dumping list.\n\n"); |
|
243 |
|
} |
|
244 |
|
pos_del(t->key_positions); |
|
245 |
|
free(t->asso_values); |
|
246 |
|
free(t->occurrences); |
|
247 |
|
free(t->alpha_unify); |
|
248 |
|
free(t->alpha_inc); |
|
249 |
|
free(t); |
|
250 |
|
}/*}}}*/ |
|
251 |
|
/*{{{ schr_optimize */ |
|
252 |
|
static void schr_optimize(struct Search *t) |
|
253 |
|
{ |
|
254 |
|
struct Keyword_List *curr_ptr; |
|
255 |
|
s32 max_hash_value; |
|
256 |
|
u32 c; |
|
257 |
|
|
|
258 |
|
/* preparations */ |
|
259 |
|
schr_prepare(t); |
|
260 |
|
|
|
261 |
|
/* Step 1: Finding good byte positions. */ |
|
262 |
|
schr_find_positions(t); |
|
263 |
|
|
|
264 |
|
/* Step 2: Finding good alpha increments. */ |
|
265 |
|
schr_find_alpha_inc(t); |
|
266 |
|
|
|
267 |
|
/* Step 3: Finding good asso_values. */ |
|
268 |
|
schr_find_good_asso_values(t); |
|
269 |
|
/* Make one final check, just to make sure nothing weird happened.... */ |
|
270 |
|
ba_clear(t->collision_detector); |
|
271 |
|
curr_ptr = t->head; |
|
272 |
|
loop { |
|
273 |
|
struct Keyword *curr; |
|
274 |
|
u32 hashcode; |
|
275 |
|
|
|
276 |
|
if (curr_ptr == 0) |
|
277 |
|
break; |
|
278 |
|
curr = curr_ptr->kw; |
|
279 |
|
hashcode = schr_compute_hash(t, curr); |
|
280 |
|
if (ba_set_bit(t->collision_detector, hashcode)) { |
|
281 |
|
/* |
|
282 |
|
* This shouldn't happen. proj1, proj2, proj3 must have been computed to be |
|
283 |
|
* injective on the given keyword set. |
|
284 |
|
*/ |
|
285 |
|
fprintf(stderr, "\nInternal error, unexpected duplicate hash code\n"); |
|
286 |
|
if (OPTS(POSITIONS)) |
|
287 |
|
fprintf(stderr, "try options -m or -r, or use new key positions.\n\n"); |
|
288 |
|
else |
|
289 |
|
fprintf(stderr, "try options -m or -r.\n\n"); |
|
290 |
|
exit(1); |
|
291 |
|
} |
|
292 |
|
curr_ptr = curr_ptr->next; |
|
293 |
|
} |
|
294 |
|
/* sorts the keyword list by hash value */ |
|
295 |
|
schr_sort(t); |
|
296 |
|
/* |
|
297 |
|
* Set unused asso_values[c] to max_hash_value + 1. This is not absolutely necessary, but |
|
298 |
|
* speeds up the lookup function in many cases of lookup failure: no string comparison is |
|
299 |
|
* needed once the hash value of a string is larger than the hash value of any keyword. |
|
300 |
|
*/ |
|
301 |
|
{ |
|
302 |
|
struct Keyword_List *tmp; |
|
303 |
|
|
|
304 |
|
tmp = t->head; |
|
305 |
|
loop { |
|
306 |
|
if (tmp->next == 0) |
|
307 |
|
break; |
|
308 |
|
tmp = tmp->next; |
|
309 |
|
} |
|
310 |
|
max_hash_value = tmp->kw->hash_value; |
|
311 |
|
} |
|
312 |
|
c = 0; |
|
313 |
|
loop { |
|
314 |
|
if (c >= t->alpha_size) |
|
315 |
|
break; |
|
316 |
|
if (t->occurrences[c] == 0) |
|
317 |
|
t->asso_values[c] = max_hash_value + 1; |
|
318 |
|
++c; |
|
319 |
|
} |
|
320 |
|
/* propagate unified asso_values */ |
|
321 |
|
if (t->alpha_unify) { |
|
322 |
|
u32 c; |
|
323 |
|
|
|
324 |
|
c = 0; |
|
325 |
|
loop { |
|
326 |
|
if (c >= t->alpha_size) |
|
327 |
|
break; |
|
328 |
|
if (t->alpha_unify[c] != c) |
|
329 |
|
t->asso_values[c] = t->asso_values[t->alpha_unify[c]]; |
|
330 |
|
++c; |
|
331 |
|
} |
|
332 |
|
} |
|
333 |
|
}/*}}}*/ |
|
334 |
|
/*{{{ schr_prepare */ |
|
335 |
|
static void schr_prepare(struct Search *t) |
|
336 |
|
{ |
|
337 |
|
struct Keyword_List *tmp; |
|
338 |
|
|
|
339 |
|
t->total_keys = 0; |
|
340 |
|
tmp = t->head; |
|
341 |
|
loop { |
|
342 |
|
if (tmp == 0) |
|
343 |
|
break; |
|
344 |
|
++(t->total_keys); |
|
345 |
|
tmp = tmp->next; |
|
346 |
|
} |
|
347 |
|
/* compute the minimum and maximum keyword length */ |
|
348 |
|
t->max_key_len = S32_MIN; |
|
349 |
|
t->min_key_len = S32_MAX; |
|
350 |
|
tmp = t->head; |
|
351 |
|
loop { |
|
352 |
|
struct Keyword *kw; |
|
353 |
|
|
|
354 |
|
if (tmp == 0) |
|
355 |
|
break; |
|
356 |
|
kw = tmp->kw; |
|
357 |
|
if (t->max_key_len < kw->allchars_length) |
|
358 |
|
t->max_key_len = kw->allchars_length; |
|
359 |
|
if (t->min_key_len > kw->allchars_length) |
|
360 |
|
t->min_key_len = kw->allchars_length; |
|
361 |
|
tmp = tmp->next; |
|
362 |
|
} |
|
363 |
|
/* |
|
364 |
|
* exit program if an empty string is used as keyword, since the comparison expressions |
|
365 |
|
* don't work correctly for looking up an empty string |
|
366 |
|
*/ |
|
367 |
|
if (t->min_key_len == 0) { |
|
368 |
|
fprintf (stderr, "Empty input keyword is not allowed.\nTo recognize an empty input keyword, your code should check for\nlen == 0 before calling the gperf generated lookup function.\n"); |
|
369 |
|
exit(1); |
|
370 |
|
} |
|
371 |
|
/* exit program if the characters in the keywords are not in the required range */ |
|
372 |
|
if (OPTS(SEVENBIT)) { |
|
373 |
|
tmp = t->head; |
|
374 |
|
loop { |
|
375 |
|
struct Keyword *kw; |
|
376 |
|
u8 *k; |
|
377 |
|
s32 i; |
|
378 |
|
|
|
379 |
|
if (tmp == 0) |
|
380 |
|
break; |
|
381 |
|
kw = tmp->kw; |
|
382 |
|
k = kw->allchars; |
|
383 |
|
i = kw->allchars_length; |
|
384 |
|
loop { |
|
385 |
|
if (i <= 0) |
|
386 |
|
break; |
|
387 |
|
if (!(*k < 128)) { |
|
388 |
|
fprintf(stderr, "Option --seven-bit has been specified,\nbut keyword \"%.*s\" contains non-ASCII characters.\nTry removing option --seven-bit.\n", kw->allchars_length, kw->allchars); |
|
389 |
|
exit(1); |
|
390 |
|
} |
|
391 |
|
i--; |
|
392 |
|
++k; |
|
393 |
|
} |
|
394 |
|
tmp = tmp->next; |
|
395 |
|
} |
|
396 |
|
} |
|
397 |
|
/* determine whether the hash function shall include the length */ |
|
398 |
|
t->hash_includes_len = !(OPTS(NOLENGTH) || (t->min_key_len == t->max_key_len)); |
|
399 |
|
}/*}}}*/ |
|
400 |
|
/*{{{ schr_find_positions */ |
|
401 |
|
/* find good key positions */ |
|
402 |
|
static void schr_find_positions(struct Search *t) |
|
403 |
|
{ |
|
404 |
|
u32 *alpha_unify; |
|
405 |
|
s32 imax; |
|
406 |
|
struct Positions *mandatory; |
|
407 |
|
struct Positions *current; |
|
408 |
|
u32 current_duplicates_count; |
|
409 |
|
/* if the user gave the key positions, we use them */ |
|
410 |
|
if (OPTS(POSITIONS)) { |
|
411 |
|
pos_cpy(t->key_positions, options->key_positions); |
|
412 |
|
return; |
|
413 |
|
} |
|
414 |
|
/* compute preliminary alpha_unify table */ |
|
415 |
|
alpha_unify = schr_compute_alpha_unify(t); |
|
416 |
|
|
|
417 |
|
/* 1. find positions that must occur in order to distinguish duplicates */ |
|
418 |
|
mandatory = pos_new(); |
|
419 |
|
if (!OPTS(DUP)) { |
|
420 |
|
struct Keyword_List *l1; |
|
421 |
|
|
|
422 |
|
l1 = t->head; |
|
423 |
|
loop { |
|
424 |
|
struct Keyword *kw1; |
|
425 |
|
struct Keyword_List *l2; |
|
426 |
|
|
|
427 |
|
if (l1 == 0 || l1->next == 0) |
|
428 |
|
break; |
|
429 |
|
kw1 = l1->kw; |
|
430 |
|
l2 = l1->next; |
|
431 |
|
loop { |
|
432 |
|
struct Keyword *kw2; |
|
433 |
|
|
|
434 |
|
if (l2 == 0) |
|
435 |
|
break; |
|
436 |
|
kw2 = l2->kw; |
|
437 |
|
/* |
|
438 |
|
* if keyword1 and keyword2 have the same length and differ |
|
439 |
|
* in just one position, and it is not the last character, |
|
440 |
|
* this position is mandatory |
|
441 |
|
*/ |
|
442 |
|
if (kw1->allchars_length == kw2->allchars_length) { |
|
443 |
|
s32 n; |
|
444 |
|
s32 i; |
|
445 |
|
|
|
446 |
|
n = kw1->allchars_length; |
|
447 |
|
i = 0; |
|
448 |
|
loop { |
|
449 |
|
u32 c1; |
|
450 |
|
u32 c2; |
|
451 |
|
|
|
452 |
|
if (i >= (n - 1)) |
|
453 |
|
break; |
|
454 |
|
c1 = kw1->allchars[i]; |
|
455 |
|
c2 = kw2->allchars[i]; |
|
456 |
|
if (OPTS(UPPERLOWER)) { |
|
457 |
|
if (c1 >= 'A' && c1 <= 'Z') |
|
458 |
|
c1 += 'a' - 'A'; |
|
459 |
|
if (c2 >= 'A' && c2 <= 'Z') |
|
460 |
|
c2 += 'a' - 'A'; |
|
461 |
|
} |
|
462 |
|
if (c1 != c2) |
|
463 |
|
break; |
|
464 |
|
++i; |
|
465 |
|
} |
|
466 |
|
if (i < (n - 1)) { |
|
467 |
|
s32 j; |
|
468 |
|
|
|
469 |
|
j = i + 1; |
|
470 |
|
loop { |
|
471 |
|
u32 c1; |
|
472 |
|
u32 c2; |
|
473 |
|
|
|
474 |
|
if (j >= n) |
|
475 |
|
break; |
|
476 |
|
c1 = kw1->allchars[j]; |
|
477 |
|
c2 = kw2->allchars[j]; |
|
478 |
|
if (OPTS(UPPERLOWER)) { |
|
479 |
|
if (c1 >= 'A' && c1 <= 'Z') |
|
480 |
|
c1 += 'a' - 'A'; |
|
481 |
|
if (c2 >= 'A' && c2 <= 'Z') |
|
482 |
|
c2 += 'a' - 'A'; |
|
483 |
|
} |
|
484 |
|
if (c1 != c2) |
|
485 |
|
break; |
|
486 |
|
++j; |
|
487 |
|
} |
|
488 |
|
if (j >= n) { |
|
489 |
|
/* position i is mandatory */ |
|
490 |
|
if (!pos_contains(mandatory, i)) |
|
491 |
|
pos_add(mandatory, i); |
|
492 |
|
} |
|
493 |
|
} |
|
494 |
|
} |
|
495 |
|
l2 = l2->next; |
|
496 |
|
} |
|
497 |
|
l1 = l1->next; |
|
498 |
|
} |
|
499 |
|
} |
|
500 |
|
/* 2. add positions, as long as this decreases the duplicates count */ |
|
501 |
|
imax = (t->max_key_len - 1 < (s32)POS_MAX_KEY_POS - 1 ? t->max_key_len - 1 |
|
502 |
|
: (s32)POS_MAX_KEY_POS - 1); |
|
503 |
|
current = pos_new(); |
|
504 |
|
pos_cpy(current, mandatory); |
|
505 |
|
current_duplicates_count = schr_count_duplicates_tuple_do(t, current, alpha_unify); |
|
506 |
|
loop { |
|
507 |
|
struct Positions *best; |
|
508 |
|
u32 best_duplicates_count; |
|
509 |
|
s32 i; |
|
510 |
|
|
|
511 |
|
best = pos_new(); |
|
512 |
|
best_duplicates_count = U32_MAX; |
|
513 |
|
i = imax; |
|
514 |
|
loop { |
|
515 |
|
if (i < -1) |
|
516 |
|
break; |
|
517 |
|
if (!pos_contains(current, i)) { |
|
518 |
|
struct Positions *tryal; |
|
519 |
|
u32 try_duplicates_count; |
|
520 |
|
|
|
521 |
|
tryal = pos_new(); |
|
522 |
|
pos_cpy(tryal, current); |
|
523 |
|
pos_add(tryal, i); |
|
524 |
|
try_duplicates_count = schr_count_duplicates_tuple_do(t, tryal, |
|
525 |
|
alpha_unify); |
|
526 |
|
/* |
|
527 |
|
* We prefer 'try' to 'best' if it produces less duplicates, or if |
|
528 |
|
* it produces the same number of duplicates but with a more |
|
529 |
|
* efficient hash function. |
|
530 |
|
*/ |
|
531 |
|
if (try_duplicates_count < best_duplicates_count |
|
532 |
|
|| (try_duplicates_count == best_duplicates_count |
|
533 |
|
&& i >=0)) { |
|
534 |
|
pos_cpy(best, tryal); |
|
535 |
|
best_duplicates_count = try_duplicates_count; |
|
536 |
|
} |
|
537 |
|
pos_del(tryal); |
|
538 |
|
} |
|
539 |
|
i--; |
|
540 |
|
} |
|
541 |
|
/* stop adding positions when it gives no improvement */ |
|
542 |
|
if (best_duplicates_count >= current_duplicates_count) |
|
543 |
|
break; |
|
544 |
|
pos_cpy(current, best); |
|
545 |
|
pos_del(best); |
|
546 |
|
current_duplicates_count = best_duplicates_count; |
|
547 |
|
} |
|
548 |
|
/* 3. remove positions, as long as this doesn't increase the duplicates count */ |
|
549 |
|
loop { |
|
550 |
|
struct Positions *best; |
|
551 |
|
u32 best_duplicates_count; |
|
552 |
|
s32 i; |
|
553 |
|
|
|
554 |
|
best = pos_new(); |