1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set sw=4 ts=8 et tw=80:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 : *
7 : * The contents of this file are subject to the Mozilla Public License Version
8 : * 1.1 (the "License"); you may not use this file except in compliance with
9 : * the License. You may obtain a copy of the License at
10 : * http://www.mozilla.org/MPL/
11 : *
12 : * Software distributed under the License is distributed on an "AS IS" basis,
13 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 : * for the specific language governing rights and limitations under the
15 : * License.
16 : *
17 : * The Original Code is String Switch Generator for JavaScript Keywords,
18 : * released 2005-12-09.
19 : *
20 : * The Initial Developer of the Original Code is
21 : * Igor Bukanov.
22 : * Portions created by the Initial Developer are Copyright (C) 2005-2006
23 : * the Initial Developer. All Rights Reserved.
24 : *
25 : * Contributor(s):
26 : *
27 : * Alternatively, the contents of this file may be used under the terms of
28 : * either of the GNU General Public License Version 2 or later (the "GPL"),
29 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 : * in which case the provisions of the GPL or the LGPL are applicable instead
31 : * of those above. If you wish to allow use of your version of this file only
32 : * under the terms of either the GPL or the LGPL, and not to allow others to
33 : * use your version of this file under the terms of the MPL, indicate your
34 : * decision by deleting the provisions above and replace them with the notice
35 : * and other provisions required by the GPL or the LGPL. If you do not delete
36 : * the provisions above, a recipient may use your version of this file under
37 : * the terms of any one of the MPL, the GPL or the LGPL.
38 : *
39 : * ***** END LICENSE BLOCK ***** */
40 :
41 : #include <stddef.h>
42 : #include <assert.h>
43 : #include <stdio.h>
44 : #include <stdlib.h>
45 : #include <string.h>
46 : #include <stdarg.h>
47 : #include <ctype.h>
48 :
49 : #include "jsversion.h"
50 :
51 : const char * const keyword_list[] = {
52 : #define JS_KEYWORD(keyword, type, op, version) #keyword,
53 : #include "jskeyword.tbl"
54 : #undef JS_KEYWORD
55 : };
56 :
57 : struct gen_opt {
58 : FILE *output; /* output file for generated source */
59 : unsigned use_if_threshold; /* max number of choices to generate
60 : "if" selector instead of "switch" */
61 : unsigned char_tail_test_threshold; /* max number of unprocessed columns
62 : to use inlined char compare
63 : for remaining chars and not generic
64 : string compare code */
65 : unsigned indent_level; /* current source identation level */
66 : };
67 :
68 : static unsigned column_to_compare;
69 :
70 : static int
71 190 : length_comparator(const void *a, const void *b)
72 : {
73 190 : const char *str1 = keyword_list[*(unsigned *)a];
74 190 : const char *str2 = keyword_list[*(unsigned *)b];
75 190 : return (int)strlen(str1) - (int)strlen(str2);
76 : }
77 :
78 : static int
79 478 : column_comparator(const void *a, const void *b)
80 : {
81 478 : const char *str1 = keyword_list[*(unsigned *)a];
82 478 : const char *str2 = keyword_list[*(unsigned *)b];
83 478 : return (int)str1[column_to_compare] - (int)str2[column_to_compare];
84 : }
85 :
86 : static unsigned
87 1 : count_different_lengths(unsigned indexes[], unsigned nelem)
88 : {
89 : unsigned nlength, current_length, i, l;
90 :
91 1 : current_length = 0;
92 1 : nlength = 0;
93 46 : for (i = 0; i != nelem; ++i) {
94 45 : l = (unsigned)strlen(keyword_list[indexes[i]]);
95 45 : assert(l != 0);
96 45 : if (current_length != l) {
97 9 : ++nlength;
98 9 : current_length = l;
99 : }
100 : }
101 1 : return nlength;
102 : }
103 :
104 : static void
105 80 : find_char_span_and_count(unsigned indexes[], unsigned nelem, unsigned column,
106 : unsigned *span_result, unsigned *count_result)
107 : {
108 : unsigned i, count;
109 : unsigned char c, prev, minc, maxc;
110 :
111 80 : assert(nelem != 0);
112 80 : minc = maxc = prev = (unsigned char)keyword_list[indexes[0]][column];
113 80 : count = 1;
114 299 : for (i = 1; i != nelem; ++i) {
115 219 : c = (unsigned char)keyword_list[indexes[i]][column];
116 219 : if (prev != c) {
117 177 : prev = c;
118 177 : ++count;
119 177 : if (minc > c) {
120 0 : minc = c;
121 177 : } else if (maxc < c) {
122 177 : maxc = c;
123 : }
124 : }
125 : }
126 :
127 80 : *span_result = maxc - minc + 1;
128 80 : *count_result = count;
129 80 : }
130 :
131 : static unsigned
132 20 : find_optimal_switch_column(struct gen_opt *opt,
133 : unsigned indexes[], unsigned nelem,
134 : unsigned columns[], unsigned unprocessed_columns,
135 : int *use_if_result)
136 : {
137 : unsigned i;
138 : unsigned span, min_span, min_span_index;
139 : unsigned nchar, min_nchar, min_nchar_index;
140 :
141 20 : assert(unprocessed_columns != 0);
142 20 : i = 0;
143 20 : min_nchar = min_span = (unsigned)-1;
144 20 : min_nchar_index = min_span_index = 0;
145 76 : do {
146 80 : column_to_compare = columns[i];
147 80 : qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
148 : find_char_span_and_count(indexes, nelem, column_to_compare,
149 80 : &span, &nchar);
150 80 : assert(span != 0);
151 80 : if (span == 1) {
152 4 : assert(nchar == 1);
153 4 : *use_if_result = 1;
154 4 : return 1;
155 : }
156 76 : assert(nchar != 1);
157 76 : if (min_span > span) {
158 31 : min_span = span;
159 31 : min_span_index = i;
160 : }
161 76 : if (min_nchar > nchar) {
162 24 : min_nchar = nchar;
163 24 : min_nchar_index = i;
164 : }
165 : } while (++i != unprocessed_columns);
166 :
167 16 : if (min_nchar <= opt->use_if_threshold) {
168 11 : *use_if_result = 1;
169 11 : i = min_nchar_index;
170 : } else {
171 5 : *use_if_result = 0;
172 5 : i = min_span_index;
173 : }
174 :
175 : /*
176 : * Restore order corresponding to i if it was destroyed by
177 : * subsequent sort.
178 : */
179 16 : if (i != unprocessed_columns - 1) {
180 14 : column_to_compare = columns[i];
181 14 : qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
182 : }
183 :
184 16 : return i;
185 : }
186 :
187 :
188 : static void
189 114 : p(struct gen_opt *opt, const char *format, ...)
190 : {
191 : va_list ap;
192 :
193 114 : va_start(ap, format);
194 114 : vfprintf(opt->output, format, ap);
195 114 : va_end(ap);
196 114 : }
197 :
198 : /* Size for '\xxx' where xxx is octal escape */
199 : #define MIN_QUOTED_CHAR_BUFFER 7
200 :
201 : static char *
202 120 : qchar(char c, char *quoted_buffer)
203 : {
204 : char *s;
205 :
206 120 : s = quoted_buffer;
207 120 : *s++ = '\'';
208 120 : switch (c) {
209 0 : case '\n': c = 'n'; goto one_char_escape;
210 0 : case '\r': c = 'r'; goto one_char_escape;
211 0 : case '\t': c = 't'; goto one_char_escape;
212 0 : case '\f': c = 't'; goto one_char_escape;
213 0 : case '\0': c = '0'; goto one_char_escape;
214 0 : case '\'': goto one_char_escape;
215 : one_char_escape:
216 0 : *s++ = '\\';
217 0 : break;
218 : default:
219 120 : if (!isprint(c)) {
220 0 : *s++ = '\\';
221 0 : *s++ = (char)('0' + (0x3 & (((unsigned char)c) >> 6)));
222 0 : *s++ = (char)('0' + (0x7 & (((unsigned char)c) >> 3)));
223 0 : c = (char)('0' + (0x7 & ((unsigned char)c)));
224 : }
225 : }
226 120 : *s++ = c;
227 120 : *s++ = '\'';
228 120 : *s = '\0';
229 120 : assert(s + 1 <= quoted_buffer + MIN_QUOTED_CHAR_BUFFER);
230 120 : return quoted_buffer;
231 : }
232 :
233 : static void
234 296 : nl(struct gen_opt *opt)
235 : {
236 296 : putc('\n', opt->output);
237 296 : }
238 :
239 : static void
240 296 : indent(struct gen_opt *opt)
241 : {
242 296 : unsigned n = opt->indent_level;
243 1403 : while (n != 0) {
244 811 : --n;
245 811 : fputs(" ", opt->output);
246 : }
247 296 : }
248 :
249 : static void
250 271 : line(struct gen_opt *opt, const char *format, ...)
251 : {
252 : va_list ap;
253 :
254 271 : indent(opt);
255 271 : va_start(ap, format);
256 271 : vfprintf(opt->output, format, ap);
257 271 : va_end(ap);
258 271 : nl(opt);
259 271 : }
260 :
261 : static void
262 65 : generate_letter_switch_r(struct gen_opt *opt,
263 : unsigned indexes[], unsigned nelem,
264 : unsigned columns[], unsigned unprocessed_columns)
265 : {
266 : char qbuf[MIN_QUOTED_CHAR_BUFFER];
267 :
268 65 : assert(nelem != 0);
269 65 : if (nelem == 1) {
270 45 : unsigned kw_index = indexes[0];
271 45 : const char *keyword = keyword_list[kw_index];
272 :
273 45 : if (unprocessed_columns == 0) {
274 2 : line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
275 43 : } else if (unprocessed_columns > opt->char_tail_test_threshold) {
276 18 : line(opt, "JSKW_TEST_GUESS(%u) /* %s */", kw_index, keyword);
277 : } else {
278 : unsigned i, column;
279 :
280 25 : indent(opt); p(opt, "if (");
281 89 : for (i = 0; i != unprocessed_columns; ++i) {
282 64 : column = columns[i];
283 64 : qchar(keyword[column], qbuf);
284 : p(opt, "%sJSKW_AT(%u)==%s", (i == 0) ? "" : " && ",
285 64 : column, qbuf);
286 : }
287 25 : p(opt, ") {"); nl(opt);
288 25 : ++opt->indent_level;
289 25 : line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
290 25 : --opt->indent_level;
291 25 : line(opt, "}");
292 25 : line(opt, "JSKW_NO_MATCH()");
293 : }
294 : } else {
295 : unsigned optimal_column_index, optimal_column;
296 : unsigned i;
297 : int use_if;
298 : char current;
299 :
300 20 : assert(unprocessed_columns != 0);
301 : optimal_column_index = find_optimal_switch_column(opt, indexes, nelem,
302 : columns,
303 : unprocessed_columns,
304 20 : &use_if);
305 20 : optimal_column = columns[optimal_column_index];
306 20 : columns[optimal_column_index] = columns[unprocessed_columns - 1];
307 :
308 20 : if (!use_if)
309 5 : line(opt, "switch (JSKW_AT(%u)) {", optimal_column);
310 :
311 20 : current = keyword_list[indexes[0]][optimal_column];
312 96 : for (i = 0; i != nelem;) {
313 56 : unsigned same_char_begin = i;
314 56 : char next = current;
315 :
316 68 : for (++i; i != nelem; ++i) {
317 48 : next = keyword_list[indexes[i]][optimal_column];
318 48 : if (next != current)
319 36 : break;
320 : }
321 56 : qchar(current, qbuf);
322 56 : if (use_if) {
323 30 : line(opt, "if (JSKW_AT(%u) == %s) {", optimal_column, qbuf);
324 : } else {
325 26 : line(opt, " case %s:", qbuf);
326 : }
327 56 : ++opt->indent_level;
328 : generate_letter_switch_r(opt, indexes + same_char_begin,
329 : i - same_char_begin,
330 56 : columns, unprocessed_columns - 1);
331 56 : --opt->indent_level;
332 56 : if (use_if) {
333 30 : line(opt, "}");
334 : }
335 56 : current = next;
336 : }
337 :
338 20 : if (!use_if) {
339 5 : line(opt, "}");
340 : }
341 :
342 20 : columns[optimal_column_index] = optimal_column;
343 :
344 20 : line(opt, "JSKW_NO_MATCH()");
345 : }
346 65 : }
347 :
348 : static void
349 9 : generate_letter_switch(struct gen_opt *opt,
350 : unsigned indexes[], unsigned nelem,
351 : unsigned current_length)
352 : {
353 : unsigned *columns;
354 : unsigned i;
355 :
356 9 : columns = (unsigned *) malloc(sizeof(columns[0]) * current_length);
357 9 : if (!columns) {
358 0 : perror("malloc");
359 0 : exit(EXIT_FAILURE);
360 : }
361 63 : for (i = 0; i != current_length; ++i) {
362 54 : columns[i] = i;
363 : }
364 9 : generate_letter_switch_r(opt, indexes, nelem, columns, current_length);
365 9 : free(columns);
366 9 : }
367 :
368 :
369 : static void
370 1 : generate_switch(struct gen_opt *opt)
371 : {
372 : unsigned *indexes;
373 : unsigned nlength;
374 : unsigned i, current;
375 : int use_if;
376 : unsigned nelem;
377 :
378 1 : nelem = sizeof(keyword_list)/sizeof(keyword_list[0]);
379 :
380 1 : line(opt, "/*");
381 1 : line(opt, " * Generating switch for the list of %u entries:", nelem);
382 46 : for (i = 0; i != nelem; ++i) {
383 45 : line(opt, " * %s", keyword_list[i]);
384 : }
385 1 : line(opt, " */");
386 :
387 1 : indexes = (unsigned *) malloc(sizeof(indexes[0]) * nelem);
388 1 : if (!indexes) {
389 0 : perror("malloc");
390 0 : exit(EXIT_FAILURE);
391 : }
392 46 : for (i = 0; i != nelem; ++i)
393 45 : indexes[i] = i;
394 1 : qsort(indexes, nelem, sizeof(indexes[i]), length_comparator);
395 1 : nlength = count_different_lengths(indexes, nelem);
396 :
397 1 : use_if = (nlength <= opt->use_if_threshold);
398 :
399 1 : if (!use_if)
400 1 : line(opt, "switch (JSKW_LENGTH()) {");
401 :
402 1 : current = (unsigned)strlen(keyword_list[indexes[0]]);
403 11 : for (i = 0; i != nelem;) {
404 9 : unsigned same_length_begin = i;
405 9 : unsigned next = current;
406 :
407 45 : for (++i; i != nelem; ++i) {
408 44 : next = (unsigned)strlen(keyword_list[indexes[i]]);
409 44 : if (next != current)
410 8 : break;
411 : }
412 9 : if (use_if) {
413 0 : line(opt, "if (JSKW_LENGTH() == %u) {", current);
414 : } else {
415 9 : line(opt, " case %u:", current);
416 : }
417 9 : ++opt->indent_level;
418 : generate_letter_switch(opt, indexes + same_length_begin,
419 : i - same_length_begin,
420 9 : current);
421 9 : --opt->indent_level;
422 9 : if (use_if) {
423 0 : line(opt, "}");
424 : }
425 9 : current = next;
426 : }
427 1 : if (!use_if)
428 1 : line(opt, "}");
429 1 : line(opt, "JSKW_NO_MATCH()");
430 1 : free(indexes);
431 1 : }
432 :
433 1 : int main(int argc, char **argv)
434 : {
435 : struct gen_opt opt;
436 :
437 1 : if (argc < 2) {
438 0 : opt.output = stdout;
439 : } else {
440 1 : opt.output = fopen(argv[1], "w");
441 1 : if (!opt.output) {
442 0 : perror("fopen");
443 0 : exit(EXIT_FAILURE);
444 : }
445 : }
446 1 : opt.indent_level = 1;
447 1 : opt.use_if_threshold = 3;
448 1 : opt.char_tail_test_threshold = 4;
449 :
450 1 : generate_switch(&opt);
451 :
452 1 : if (opt.output != stdout) {
453 1 : if (fclose(opt.output)) {
454 0 : perror("fclose");
455 0 : exit(EXIT_FAILURE);
456 : }
457 : }
458 :
459 1 : return EXIT_SUCCESS;
460 : }
|