1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sw=4 et tw=99:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 : *
7 : * The contents of this file are subject to the Mozilla Public License Version
8 : * 1.1 (the "License"); you may not use this file except in compliance with
9 : * the License. You may obtain a copy of the License at
10 : * http://www.mozilla.org/MPL/
11 : *
12 : * Software distributed under the License is distributed on an "AS IS" basis,
13 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 : * for the specific language governing rights and limitations under the
15 : * License.
16 : *
17 : * The Original Code is Mozilla Communicator client code, released
18 : * March 31, 1998.
19 : *
20 : * The Initial Developer of the Original Code is
21 : * Netscape Communications Corporation.
22 : * Portions created by the Initial Developer are Copyright (C) 1998
23 : * the Initial Developer. All Rights Reserved.
24 : *
25 : * Contributor(s):
26 : * Nick Fitzgerald <nfitzgerald@mozilla.com>
27 : *
28 : * Alternatively, the contents of this file may be used under the terms of
29 : * either of the GNU General Public License Version 2 or later (the "GPL"),
30 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
31 : * in which case the provisions of the GPL or the LGPL are applicable instead
32 : * of those above. If you wish to allow use of your version of this file only
33 : * under the terms of either the GPL or the LGPL, and not to allow others to
34 : * use your version of this file under the terms of the MPL, indicate your
35 : * decision by deleting the provisions above and replace them with the notice
36 : * and other provisions required by the GPL or the LGPL. If you do not delete
37 : * the provisions above, a recipient may use your version of this file under
38 : * the terms of any one of the MPL, the GPL or the LGPL.
39 : *
40 : * ***** END LICENSE BLOCK ***** */
41 :
42 : /*
43 : * JS lexical scanner.
44 : */
45 : #include <stdio.h> /* first to avoid trouble on some systems */
46 : #include <errno.h>
47 : #include <limits.h>
48 : #include <math.h>
49 : #ifdef HAVE_MEMORY_H
50 : #include <memory.h>
51 : #endif
52 : #include <stdarg.h>
53 : #include <stdlib.h>
54 : #include <string.h>
55 : #include "jstypes.h"
56 : #include "jsutil.h"
57 : #include "jsprf.h"
58 : #include "jsapi.h"
59 : #include "jsatom.h"
60 : #include "jscntxt.h"
61 : #include "jsversion.h"
62 : #include "jsexn.h"
63 : #include "jsnum.h"
64 : #include "jsopcode.h"
65 : #include "jsscript.h"
66 :
67 : #include "frontend/BytecodeEmitter.h"
68 : #include "frontend/Parser.h"
69 : #include "frontend/TokenStream.h"
70 : #include "vm/RegExpObject.h"
71 :
72 : #include "jsscriptinlines.h"
73 :
74 : #if JS_HAS_XML_SUPPORT
75 : #include "jsxml.h"
76 : #endif
77 :
78 : using namespace js;
79 : using namespace js::unicode;
80 :
81 : #define JS_KEYWORD(keyword, type, op, version) \
82 : const char js_##keyword##_str[] = #keyword;
83 : #include "jskeyword.tbl"
84 : #undef JS_KEYWORD
85 :
86 : static const KeywordInfo keywords[] = {
87 : #define JS_KEYWORD(keyword, type, op, version) \
88 : {js_##keyword##_str, type, op, version},
89 : #include "jskeyword.tbl"
90 : #undef JS_KEYWORD
91 : };
92 :
93 : const KeywordInfo *
94 26548174 : js::FindKeyword(const jschar *s, size_t length)
95 : {
96 26548174 : JS_ASSERT(length != 0);
97 :
98 : register size_t i;
99 : const struct KeywordInfo *kw;
100 : const char *chars;
101 :
102 : #define JSKW_LENGTH() length
103 : #define JSKW_AT(column) s[column]
104 : #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
105 : #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
106 : #define JSKW_NO_MATCH() goto no_match;
107 : #include "jsautokw.h"
108 : #undef JSKW_NO_MATCH
109 : #undef JSKW_TEST_GUESS
110 : #undef JSKW_GOT_MATCH
111 : #undef JSKW_AT
112 : #undef JSKW_LENGTH
113 :
114 : got_match:
115 6044825 : return &keywords[i];
116 :
117 : test_guess:
118 2071393 : kw = &keywords[i];
119 2071393 : chars = kw->chars;
120 12867979 : do {
121 13223862 : if (*s++ != (unsigned char)(*chars++))
122 355883 : goto no_match;
123 : } while (--length != 0);
124 1715510 : return kw;
125 :
126 : no_match:
127 18787839 : return NULL;
128 : }
129 :
130 : JSBool
131 391165 : js::IsIdentifier(JSLinearString *str)
132 : {
133 391165 : const jschar *chars = str->chars();
134 391165 : size_t length = str->length();
135 :
136 391165 : if (length == 0)
137 27 : return JS_FALSE;
138 391138 : jschar c = *chars;
139 391138 : if (!IsIdentifierStart(c))
140 84 : return JS_FALSE;
141 391054 : const jschar *end = chars + length;
142 2233816 : while (++chars != end) {
143 1451737 : c = *chars;
144 1451737 : if (!IsIdentifierPart(c))
145 29 : return JS_FALSE;
146 : }
147 391025 : return JS_TRUE;
148 : }
149 :
150 : #ifdef _MSC_VER
151 : #pragma warning(push)
152 : #pragma warning(disable:4351)
153 : #endif
154 :
155 : /* Initialize members that aren't initialized in |init|. */
156 138998 : TokenStream::TokenStream(JSContext *cx, JSPrincipals *prin, JSPrincipals *originPrin)
157 : : tokens(), cursor(), lookahead(), flags(), listenerTSData(), tokenbuf(cx),
158 138998 : cx(cx), originPrincipals(JSScript::normalizeOriginPrincipals(prin, originPrin))
159 : {
160 138998 : if (originPrincipals)
161 27342 : JS_HoldPrincipals(originPrincipals);
162 138998 : }
163 :
164 : #ifdef _MSC_VER
165 : #pragma warning(pop)
166 : #endif
167 :
168 : bool
169 138998 : TokenStream::init(const jschar *base, size_t length, const char *fn, unsigned ln, JSVersion v)
170 : {
171 138998 : filename = fn;
172 138998 : lineno = ln;
173 138998 : version = v;
174 138998 : xml = VersionHasXML(v);
175 :
176 138998 : userbuf.init(base, length);
177 138998 : linebase = base;
178 138998 : prevLinebase = NULL;
179 138998 : sourceMap = NULL;
180 :
181 138998 : JSSourceHandler listener = cx->runtime->debugHooks.sourceHandler;
182 138998 : void *listenerData = cx->runtime->debugHooks.sourceHandlerData;
183 :
184 138998 : if (listener)
185 0 : listener(fn, ln, base, length, &listenerTSData, listenerData);
186 :
187 : /*
188 : * This table holds all the token kinds that satisfy these properties:
189 : * - A single char long.
190 : * - Cannot be a prefix of any longer token (eg. '+' is excluded because
191 : * '+=' is a valid token).
192 : * - Doesn't need tp->t_op set (eg. this excludes '~').
193 : *
194 : * The few token kinds satisfying these properties cover roughly 35--45%
195 : * of the tokens seen in practice.
196 : *
197 : * Nb: oneCharTokens, maybeEOL and maybeStrSpecial could be static, but
198 : * initializing them this way is a bit easier. Don't worry, the time to
199 : * initialize them for each TokenStream is trivial. See bug 639420.
200 : */
201 138998 : memset(oneCharTokens, 0, sizeof(oneCharTokens));
202 138998 : oneCharTokens[unsigned(';')] = TOK_SEMI;
203 138998 : oneCharTokens[unsigned(',')] = TOK_COMMA;
204 138998 : oneCharTokens[unsigned('?')] = TOK_HOOK;
205 138998 : oneCharTokens[unsigned('[')] = TOK_LB;
206 138998 : oneCharTokens[unsigned(']')] = TOK_RB;
207 138998 : oneCharTokens[unsigned('{')] = TOK_LC;
208 138998 : oneCharTokens[unsigned('}')] = TOK_RC;
209 138998 : oneCharTokens[unsigned('(')] = TOK_LP;
210 138998 : oneCharTokens[unsigned(')')] = TOK_RP;
211 :
212 : /* See getChar() for an explanation of maybeEOL[]. */
213 138998 : memset(maybeEOL, 0, sizeof(maybeEOL));
214 138998 : maybeEOL[unsigned('\n')] = true;
215 138998 : maybeEOL[unsigned('\r')] = true;
216 138998 : maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
217 138998 : maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
218 :
219 : /* See getTokenInternal() for an explanation of maybeStrSpecial[]. */
220 138998 : memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
221 138998 : maybeStrSpecial[unsigned('"')] = true;
222 138998 : maybeStrSpecial[unsigned('\'')] = true;
223 138998 : maybeStrSpecial[unsigned('\\')] = true;
224 138998 : maybeStrSpecial[unsigned('\n')] = true;
225 138998 : maybeStrSpecial[unsigned('\r')] = true;
226 138998 : maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
227 138998 : maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
228 138998 : maybeStrSpecial[unsigned(EOF & 0xff)] = true;
229 :
230 : /*
231 : * Set |ln| as the beginning line number of the ungot "current token", so
232 : * that js::Parser::statements (and potentially other such methods, in the
233 : * future) can create parse nodes with good source coordinates before they
234 : * explicitly get any tokens.
235 : *
236 : * Switching the parser/lexer so we always get the next token ahead of the
237 : * parser needing it (the so-called "pump-priming" model) might be a better
238 : * way to address the dependency from statements on the current token.
239 : */
240 138998 : tokens[0].pos.begin.lineno = tokens[0].pos.end.lineno = ln;
241 138998 : return true;
242 : }
243 :
244 277996 : TokenStream::~TokenStream()
245 : {
246 138998 : if (flags & TSF_OWNFILENAME)
247 1 : cx->free_((void *) filename);
248 138998 : if (sourceMap)
249 0 : cx->free_(sourceMap);
250 138998 : if (originPrincipals)
251 27342 : JS_DropPrincipals(cx->runtime, originPrincipals);
252 138998 : }
253 :
254 : /* Use the fastest available getc. */
255 : #if defined(HAVE_GETC_UNLOCKED)
256 : # define fast_getc getc_unlocked
257 : #elif defined(HAVE__GETC_NOLOCK)
258 : # define fast_getc _getc_nolock
259 : #else
260 : # define fast_getc getc
261 : #endif
262 :
263 : JS_ALWAYS_INLINE void
264 19657212 : TokenStream::updateLineInfoForEOL()
265 : {
266 19657212 : prevLinebase = linebase;
267 19657212 : linebase = userbuf.addressOfNextRawChar();
268 19657212 : lineno++;
269 19657212 : }
270 :
271 : JS_ALWAYS_INLINE void
272 16297084 : TokenStream::updateFlagsForEOL()
273 : {
274 16297084 : flags &= ~TSF_DIRTYLINE;
275 16297084 : flags |= TSF_EOL;
276 16297084 : }
277 :
278 : /* This gets the next char, normalizing all EOL sequences to '\n' as it goes. */
279 : int32_t
280 163253124 : TokenStream::getChar()
281 : {
282 : int32_t c;
283 163253124 : if (JS_LIKELY(userbuf.hasRawChars())) {
284 163253052 : c = userbuf.getRawChar();
285 :
286 : /*
287 : * Normalize the jschar if it was a newline. We need to detect any of
288 : * these four characters: '\n' (0x000a), '\r' (0x000d),
289 : * LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each
290 : * one in turn is slow, so we use a single probabilistic check, and if
291 : * that succeeds, test for them individually.
292 : *
293 : * We use the bottom 8 bits to index into a lookup table, succeeding
294 : * when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which
295 : * are by the far the most common) this gives false positives for '('
296 : * (0x0028) and ')' (0x0029). We could avoid those by incorporating
297 : * the 13th bit of d into the lookup, but that requires extra shifting
298 : * and masking and isn't worthwhile. See TokenStream::init() for the
299 : * initialization of the relevant entries in the table.
300 : */
301 163253052 : if (JS_UNLIKELY(maybeEOL[c & 0xff])) {
302 4021952 : if (c == '\n')
303 3607919 : goto eol;
304 414033 : if (c == '\r') {
305 : /* if it's a \r\n sequence: treat as a single EOL, skip over the \n */
306 17205 : if (userbuf.hasRawChars())
307 17205 : userbuf.matchRawChar('\n');
308 17205 : goto eol;
309 : }
310 396828 : if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
311 : goto eol;
312 : }
313 159627928 : return c;
314 : }
315 :
316 72 : flags |= TSF_EOF;
317 72 : return EOF;
318 :
319 : eol:
320 3625124 : updateLineInfoForEOL();
321 3625124 : return '\n';
322 : }
323 :
324 : /*
325 : * This gets the next char. It does nothing special with EOL sequences, not
326 : * even updating the line counters. It can be used safely if (a) the
327 : * resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
328 : * it's an EOL, and (b) the line-related state (lineno, linebase) is not used
329 : * before it's ungotten.
330 : */
331 : int32_t
332 878253802 : TokenStream::getCharIgnoreEOL()
333 : {
334 878253802 : if (JS_LIKELY(userbuf.hasRawChars()))
335 878240524 : return userbuf.getRawChar();
336 :
337 13278 : flags |= TSF_EOF;
338 13278 : return EOF;
339 : }
340 :
341 : void
342 16362311 : TokenStream::ungetChar(int32_t c)
343 : {
344 16362311 : if (c == EOF)
345 71 : return;
346 16362240 : JS_ASSERT(!userbuf.atStart());
347 16362240 : userbuf.ungetRawChar();
348 16362240 : if (c == '\n') {
349 : #ifdef DEBUG
350 1550754 : int32_t c2 = userbuf.peekRawChar();
351 1550754 : JS_ASSERT(TokenBuf::isRawEOLChar(c2));
352 : #endif
353 :
354 : /* if it's a \r\n sequence, also unget the \r */
355 1550754 : if (!userbuf.atStart())
356 1550754 : userbuf.matchRawCharBackwards('\r');
357 :
358 1550754 : JS_ASSERT(prevLinebase); /* we should never get more than one EOL char */
359 1550754 : linebase = prevLinebase;
360 1550754 : prevLinebase = NULL;
361 1550754 : lineno--;
362 : } else {
363 14811486 : JS_ASSERT(userbuf.peekRawChar() == c);
364 : }
365 : }
366 :
367 : void
368 56611876 : TokenStream::ungetCharIgnoreEOL(int32_t c)
369 : {
370 56611876 : if (c == EOF)
371 13260 : return;
372 56598616 : JS_ASSERT(!userbuf.atStart());
373 56598616 : userbuf.ungetRawChar();
374 : }
375 :
376 : /*
377 : * Return true iff |n| raw characters can be read from this without reading past
378 : * EOF or a newline, and copy those characters into |cp| if so. The characters
379 : * are not consumed: use skipChars(n) to do so after checking that the consumed
380 : * characters had appropriate values.
381 : */
382 : bool
383 738404 : TokenStream::peekChars(int n, jschar *cp)
384 : {
385 : int i, j;
386 : int32_t c;
387 :
388 12266851 : for (i = 0; i < n; i++) {
389 11578674 : c = getCharIgnoreEOL();
390 11578674 : if (c == EOF)
391 18 : break;
392 11578656 : if (c == '\n') {
393 50209 : ungetCharIgnoreEOL(c);
394 50209 : break;
395 : }
396 11528447 : cp[i] = (jschar)c;
397 : }
398 12266851 : for (j = i - 1; j >= 0; j--)
399 11528447 : ungetCharIgnoreEOL(cp[j]);
400 738404 : return i == n;
401 : }
402 :
403 : const jschar *
404 5443 : TokenStream::TokenBuf::findEOL()
405 : {
406 5443 : const jschar *tmp = ptr;
407 : #ifdef DEBUG
408 : /*
409 : * This is the one exception to the "TokenBuf isn't accessed after
410 : * poisoning" rule -- we may end up calling findEOL() in order to set up
411 : * an error.
412 : */
413 5443 : if (!tmp)
414 0 : tmp = ptrWhenPoisoned;
415 : #endif
416 :
417 19732 : while (true) {
418 25175 : if (tmp >= limit)
419 581 : break;
420 24594 : if (TokenBuf::isRawEOLChar(*tmp++))
421 4862 : break;
422 : }
423 5443 : return tmp;
424 : }
425 :
426 : bool
427 15775 : TokenStream::reportCompileErrorNumberVA(ParseNode *pn, unsigned flags, unsigned errorNumber, va_list ap)
428 : {
429 : JSErrorReport report;
430 : char *message;
431 : jschar *linechars;
432 : char *linebytes;
433 : bool warning;
434 : JSBool ok;
435 : const TokenPos *tp;
436 : unsigned i;
437 :
438 15775 : if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
439 9870 : return true;
440 :
441 5905 : warning = JSREPORT_IS_WARNING(flags);
442 5905 : if (warning && cx->hasWErrorOption()) {
443 0 : flags &= ~JSREPORT_WARNING;
444 0 : warning = false;
445 : }
446 :
447 5905 : PodZero(&report);
448 5905 : report.flags = flags;
449 5905 : report.errorNumber = errorNumber;
450 5905 : message = NULL;
451 5905 : linechars = NULL;
452 5905 : linebytes = NULL;
453 :
454 : MUST_FLOW_THROUGH("out");
455 : ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
456 : errorNumber, &message, &report,
457 5905 : !(flags & JSREPORT_UC), ap);
458 5905 : if (!ok) {
459 0 : warning = false;
460 0 : goto out;
461 : }
462 :
463 5905 : report.filename = filename;
464 5905 : report.originPrincipals = originPrincipals;
465 :
466 5905 : tp = pn ? &pn->pn_pos : ¤tToken().pos;
467 5905 : report.lineno = tp->begin.lineno;
468 :
469 : /*
470 : * Given a token, T, that we want to complain about: if T's (starting)
471 : * lineno doesn't match TokenStream's lineno, that means we've scanned past
472 : * the line that T starts on, which makes it hard to print some or all of
473 : * T's (starting) line for context.
474 : *
475 : * So we don't even try, leaving report.linebuf and friends zeroed. This
476 : * means that any error involving a multi-line token (eg. an unterminated
477 : * multi-line string literal) won't have a context printed.
478 : */
479 5905 : if (report.lineno == lineno) {
480 5443 : size_t linelength = userbuf.findEOL() - linebase;
481 :
482 5443 : linechars = (jschar *)cx->malloc_((linelength + 1) * sizeof(jschar));
483 5443 : if (!linechars) {
484 0 : warning = false;
485 0 : goto out;
486 : }
487 5443 : PodCopy(linechars, linebase, linelength);
488 5443 : linechars[linelength] = 0;
489 5443 : linebytes = DeflateString(cx, linechars, linelength);
490 5443 : if (!linebytes) {
491 0 : warning = false;
492 0 : goto out;
493 : }
494 :
495 : /* Unicode and char versions of the offending source line, without final \n */
496 5443 : report.linebuf = linebytes;
497 5443 : report.uclinebuf = linechars;
498 :
499 : /* The lineno check above means we should only see single-line tokens here. */
500 5443 : JS_ASSERT(tp->begin.lineno == tp->end.lineno);
501 5443 : report.tokenptr = report.linebuf + tp->begin.index;
502 5443 : report.uctokenptr = report.uclinebuf + tp->begin.index;
503 : }
504 :
505 : /*
506 : * If there's a runtime exception type associated with this error
507 : * number, set that as the pending exception. For errors occuring at
508 : * compile time, this is very likely to be a JSEXN_SYNTAXERR.
509 : *
510 : * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
511 : * flag will be set in report.flags. Proper behavior for an error
512 : * reporter is to ignore a report with this flag for all but top-level
513 : * compilation errors. The exception will remain pending, and so long
514 : * as the non-top-level "load", "eval", or "compile" native function
515 : * returns false, the top-level reporter will eventually receive the
516 : * uncaught exception report.
517 : */
518 5905 : if (!js_ErrorToException(cx, message, &report, NULL, NULL)) {
519 : /*
520 : * If debugErrorHook is present then we give it a chance to veto
521 : * sending the error on to the regular error reporter.
522 : */
523 5303 : bool reportError = true;
524 5303 : if (JSDebugErrorHook hook = cx->runtime->debugHooks.debugErrorHook)
525 1245 : reportError = hook(cx, message, &report, cx->runtime->debugHooks.debugErrorHookData);
526 :
527 : /* Report the error */
528 5303 : if (reportError && cx->errorReporter)
529 5303 : cx->errorReporter(cx, message, &report);
530 : }
531 :
532 : out:
533 5905 : if (linebytes)
534 5443 : cx->free_(linebytes);
535 5905 : if (linechars)
536 5443 : cx->free_(linechars);
537 5905 : if (message)
538 5905 : cx->free_(message);
539 5905 : if (report.ucmessage)
540 5905 : cx->free_((void *)report.ucmessage);
541 :
542 5905 : if (report.messageArgs) {
543 1508 : if (!(flags & JSREPORT_UC)) {
544 1508 : i = 0;
545 4718 : while (report.messageArgs[i])
546 1702 : cx->free_((void *)report.messageArgs[i++]);
547 : }
548 1508 : cx->free_((void *)report.messageArgs);
549 : }
550 :
551 5905 : return warning;
552 : }
553 :
554 : bool
555 18113 : js::ReportStrictModeError(JSContext *cx, TokenStream *ts, TreeContext *tc, ParseNode *pn,
556 : unsigned errorNumber, ...)
557 : {
558 18113 : JS_ASSERT(ts || tc);
559 18113 : JS_ASSERT(cx == ts->getContext());
560 :
561 : /* In strict mode code, this is an error, not merely a warning. */
562 : unsigned flags;
563 18113 : if ((ts && ts->isStrictMode()) || (tc && (tc->flags & TCF_STRICT_MODE_CODE))) {
564 0 : flags = JSREPORT_ERROR;
565 : } else {
566 18113 : if (!cx->hasStrictOption())
567 14786 : return true;
568 3327 : flags = JSREPORT_WARNING;
569 : }
570 :
571 : va_list ap;
572 3327 : va_start(ap, errorNumber);
573 3327 : bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
574 3327 : va_end(ap);
575 :
576 3327 : return result;
577 : }
578 :
579 : bool
580 3405 : js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, ParseNode *pn, unsigned flags,
581 : unsigned errorNumber, ...)
582 : {
583 : va_list ap;
584 :
585 : /*
586 : * We don't accept a TreeContext argument, so we can't implement
587 : * JSREPORT_STRICT_MODE_ERROR here. Use ReportStrictModeError instead,
588 : * or do the checks in the caller and pass plain old JSREPORT_ERROR.
589 : */
590 3405 : JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
591 :
592 3405 : va_start(ap, errorNumber);
593 3405 : JS_ASSERT(cx == ts->getContext());
594 3405 : bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
595 3405 : va_end(ap);
596 :
597 3405 : return result;
598 : }
599 :
600 : #if JS_HAS_XML_SUPPORT
601 :
602 : bool
603 0 : TokenStream::getXMLEntity()
604 : {
605 : ptrdiff_t offset, length, i;
606 : int c, d;
607 : JSBool ispair;
608 : jschar *bp, digit;
609 : char *bytes;
610 : JSErrNum msg;
611 :
612 0 : CharBuffer &tb = tokenbuf;
613 :
614 : /* Put the entity, including the '&' already scanned, in tokenbuf. */
615 0 : offset = tb.length();
616 0 : if (!tb.append('&'))
617 0 : return false;
618 0 : while ((c = getChar()) != ';') {
619 0 : if (c == EOF || c == '\n') {
620 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
621 0 : return false;
622 : }
623 0 : if (!tb.append(c))
624 0 : return false;
625 : }
626 :
627 : /* Let length be the number of jschars after the '&', including the ';'. */
628 0 : length = tb.length() - offset;
629 0 : bp = tb.begin() + offset;
630 0 : c = d = 0;
631 0 : ispair = false;
632 0 : if (length > 2 && bp[1] == '#') {
633 : /* Match a well-formed XML Character Reference. */
634 0 : i = 2;
635 0 : if (length > 3 && (bp[i] == 'x' || bp[i] == 'X')) {
636 0 : if (length > 9) /* at most 6 hex digits allowed */
637 0 : goto badncr;
638 0 : while (++i < length) {
639 0 : digit = bp[i];
640 0 : if (!JS7_ISHEX(digit))
641 : goto badncr;
642 0 : c = (c << 4) + JS7_UNHEX(digit);
643 : }
644 : } else {
645 0 : while (i < length) {
646 0 : digit = bp[i++];
647 0 : if (!JS7_ISDEC(digit))
648 0 : goto badncr;
649 0 : c = (c * 10) + JS7_UNDEC(digit);
650 0 : if (c < 0)
651 0 : goto badncr;
652 : }
653 : }
654 :
655 0 : if (0x10000 <= c && c <= 0x10FFFF) {
656 : /* Form a surrogate pair (c, d) -- c is the high surrogate. */
657 0 : d = 0xDC00 + (c & 0x3FF);
658 0 : c = 0xD7C0 + (c >> 10);
659 0 : ispair = true;
660 : } else {
661 : /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
662 0 : if (c != 0x9 && c != 0xA && c != 0xD &&
663 0 : !(0x20 <= c && c <= 0xD7FF) &&
664 0 : !(0xE000 <= c && c <= 0xFFFD)) {
665 : goto badncr;
666 : }
667 : }
668 : } else {
669 : /* Try to match one of the five XML 1.0 predefined entities. */
670 0 : switch (length) {
671 : case 3:
672 0 : if (bp[2] == 't') {
673 0 : if (bp[1] == 'l')
674 0 : c = '<';
675 0 : else if (bp[1] == 'g')
676 0 : c = '>';
677 : }
678 0 : break;
679 : case 4:
680 0 : if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
681 0 : c = '&';
682 0 : break;
683 : case 5:
684 0 : if (bp[3] == 'o') {
685 0 : if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
686 0 : c = '\'';
687 0 : else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
688 0 : c = '"';
689 : }
690 0 : break;
691 : }
692 0 : if (c == 0) {
693 0 : msg = JSMSG_UNKNOWN_XML_ENTITY;
694 0 : goto bad;
695 : }
696 : }
697 :
698 : /* If we matched, retract tokenbuf and store the entity's value. */
699 0 : *bp++ = (jschar) c;
700 0 : if (ispair)
701 0 : *bp++ = (jschar) d;
702 0 : tb.shrinkBy(tb.end() - bp);
703 0 : return true;
704 :
705 : badncr:
706 0 : msg = JSMSG_BAD_XML_NCR;
707 : bad:
708 : /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
709 0 : JS_ASSERT((tb.end() - bp) >= 1);
710 0 : bytes = DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
711 0 : if (bytes) {
712 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
713 0 : cx->free_(bytes);
714 : }
715 0 : return false;
716 : }
717 :
718 : bool
719 10078 : TokenStream::getXMLTextOrTag(TokenKind *ttp, Token **tpp)
720 : {
721 : TokenKind tt;
722 : int c, qc;
723 : Token *tp;
724 : JSAtom *atom;
725 :
726 : /*
727 : * Look for XML text.
728 : */
729 10078 : if (flags & TSF_XMLTEXTMODE) {
730 1884 : tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
731 1884 : tp = newToken(0);
732 1884 : tokenbuf.clear();
733 1884 : qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
734 :
735 3989 : while ((c = getChar()) != qc && c != '<' && c != EOF) {
736 221 : if (c == '&' && qc == '<') {
737 0 : if (!getXMLEntity())
738 0 : goto error;
739 0 : tt = TOK_XMLTEXT;
740 0 : continue;
741 : }
742 :
743 221 : if (!IsXMLSpace(c))
744 221 : tt = TOK_XMLTEXT;
745 221 : if (!tokenbuf.append(c))
746 0 : goto error;
747 : }
748 1884 : ungetChar(c);
749 :
750 1884 : if (tokenbuf.empty()) {
751 1729 : atom = NULL;
752 : } else {
753 155 : atom = atomize(cx, tokenbuf);
754 155 : if (!atom)
755 0 : goto error;
756 : }
757 1884 : tp->pos.end.lineno = lineno;
758 1884 : tp->setAtom(JSOP_STRING, atom);
759 1884 : goto out;
760 : }
761 :
762 : /*
763 : * XML tags.
764 : */
765 : else {
766 8194 : JS_ASSERT(flags & TSF_XMLTAGMODE);
767 8194 : tp = newToken(0);
768 8194 : c = getChar();
769 8194 : if (c != EOF && IsXMLSpace(c)) {
770 641 : do {
771 641 : c = getChar();
772 641 : if (c == EOF)
773 0 : break;
774 641 : } while (IsXMLSpace(c));
775 641 : ungetChar(c);
776 641 : tp->pos.end.lineno = lineno;
777 641 : tt = TOK_XMLSPACE;
778 641 : goto out;
779 : }
780 :
781 7553 : if (c == EOF) {
782 0 : tt = TOK_EOF;
783 0 : goto out;
784 : }
785 :
786 7553 : tokenbuf.clear();
787 7553 : if (IsXMLNamespaceStart(c)) {
788 3438 : JSBool sawColon = JS_FALSE;
789 :
790 3438 : if (!tokenbuf.append(c))
791 0 : goto error;
792 17038 : while ((c = getChar()) != EOF && IsXMLNamePart(c)) {
793 10162 : if (c == ':') {
794 : int nextc;
795 :
796 0 : if (sawColon ||
797 : (nextc = peekChar(),
798 : ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
799 0 : !IsXMLNamePart(nextc))) {
800 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
801 0 : JSMSG_BAD_XML_QNAME);
802 0 : goto error;
803 : }
804 0 : sawColon = JS_TRUE;
805 : }
806 :
807 10162 : if (!tokenbuf.append(c))
808 0 : goto error;
809 : }
810 :
811 3438 : ungetChar(c);
812 3438 : atom = atomize(cx, tokenbuf);
813 3438 : if (!atom)
814 0 : goto error;
815 3438 : tp->setAtom(JSOP_STRING, atom);
816 3438 : tt = TOK_XMLNAME;
817 3438 : goto out;
818 : }
819 :
820 4115 : switch (c) {
821 : case '{':
822 0 : if (flags & TSF_XMLONLYMODE)
823 0 : goto bad_xml_char;
824 0 : tt = TOK_LC;
825 0 : goto out;
826 :
827 : case '=':
828 641 : tt = TOK_ASSIGN;
829 641 : goto out;
830 :
831 : case '"':
832 : case '\'':
833 641 : qc = c;
834 1300 : while ((c = getChar()) != qc) {
835 18 : if (c == EOF) {
836 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
837 0 : JSMSG_UNTERMINATED_STRING);
838 0 : goto error;
839 : }
840 :
841 : /*
842 : * XML attribute values are double-quoted when pretty-printed,
843 : * so escape " if it is expressed directly in a single-quoted
844 : * attribute value.
845 : */
846 18 : if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
847 0 : JS_ASSERT(qc == '\'');
848 0 : if (!tokenbuf.append(js_quot_entity_str,
849 0 : strlen(js_quot_entity_str)))
850 0 : goto error;
851 0 : continue;
852 : }
853 :
854 18 : if (c == '&' && (flags & TSF_XMLONLYMODE)) {
855 0 : if (!getXMLEntity())
856 0 : goto error;
857 0 : continue;
858 : }
859 :
860 18 : if (!tokenbuf.append(c))
861 0 : goto error;
862 : }
863 641 : atom = atomize(cx, tokenbuf);
864 641 : if (!atom)
865 0 : goto error;
866 641 : tp->pos.end.lineno = lineno;
867 641 : tp->setAtom(JSOP_STRING, atom);
868 641 : tt = TOK_XMLATTR;
869 641 : goto out;
870 :
871 : case '>':
872 2356 : tt = TOK_XMLTAGC;
873 2356 : goto out;
874 :
875 : case '/':
876 477 : if (matchChar('>')) {
877 477 : tt = TOK_XMLPTAGC;
878 477 : goto out;
879 : }
880 : /* FALL THROUGH */
881 :
882 : bad_xml_char:
883 : default:
884 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
885 0 : goto error;
886 : }
887 : JS_NOT_REACHED("getXMLTextOrTag 1");
888 : }
889 : JS_NOT_REACHED("getXMLTextOrTag 2");
890 :
891 : out:
892 10078 : *ttp = tt;
893 10078 : *tpp = tp;
894 10078 : return true;
895 :
896 : error:
897 0 : *ttp = TOK_ERROR;
898 0 : *tpp = tp;
899 0 : return false;
900 : }
901 :
902 : /*
903 : * After much testing, it's clear that Postel's advice to protocol designers
904 : * ("be liberal in what you accept, and conservative in what you send") invites
905 : * a natural-law repercussion for JS as "protocol":
906 : *
907 : * "If you are liberal in what you accept, others will utterly fail to be
908 : * conservative in what they send."
909 : *
910 : * Which means you will get <!-- comments to end of line in the middle of .js
911 : * files, and after if conditions whose then statements are on the next line,
912 : * and other wonders. See at least the following bugs:
913 : * - https://bugzilla.mozilla.org/show_bug.cgi?id=309242
914 : * - https://bugzilla.mozilla.org/show_bug.cgi?id=309712
915 : * - https://bugzilla.mozilla.org/show_bug.cgi?id=310993
916 : *
917 : * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan an XML
918 : * comment or CDATA literal. Instead, we always scan <! as the start of an
919 : * HTML comment hack to end of line, used since Netscape 2 to hide script tag
920 : * content from script-unaware browsers.
921 : *
922 : * But this still leaves XML resources with certain internal structure
923 : * vulnerable to being loaded as script cross-origin, and some internal data
924 : * stolen, so for Firefox 3.5 and beyond, we reject programs whose source
925 : * consists only of XML literals. See:
926 : *
927 : * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
928 : *
929 : * The check for this is in js::frontend::CompileScript.
930 : */
931 : bool
932 2860 : TokenStream::getXMLMarkup(TokenKind *ttp, Token **tpp)
933 : {
934 : TokenKind tt;
935 : int c;
936 2860 : Token *tp = *tpp;
937 :
938 : /* Check for XML comment or CDATA section. */
939 2860 : if (matchChar('!')) {
940 27 : tokenbuf.clear();
941 :
942 : /* Scan XML comment. */
943 27 : if (matchChar('-')) {
944 0 : if (!matchChar('-'))
945 0 : goto bad_xml_markup;
946 0 : while ((c = getChar()) != '-' || !matchChar('-')) {
947 0 : if (c == EOF)
948 0 : goto bad_xml_markup;
949 0 : if (!tokenbuf.append(c))
950 0 : goto error;
951 : }
952 0 : if (!matchChar('>'))
953 0 : goto bad_xml_markup;
954 :
955 0 : JSAtom *commentText = atomize(cx, tokenbuf);
956 0 : if (!commentText)
957 0 : goto error;
958 0 : tp->setAtom(JSOP_XMLCOMMENT, commentText);
959 0 : tp->pos.end.lineno = lineno;
960 0 : tt = TOK_XMLCOMMENT;
961 0 : goto out;
962 : }
963 :
964 : /* Scan CDATA section. */
965 27 : if (matchChar('[')) {
966 : jschar cp[6];
967 189 : if (peekChars(6, cp) &&
968 27 : cp[0] == 'C' &&
969 27 : cp[1] == 'D' &&
970 27 : cp[2] == 'A' &&
971 27 : cp[3] == 'T' &&
972 27 : cp[4] == 'A' &&
973 27 : cp[5] == '[') {
974 27 : skipChars(6);
975 108 : while ((c = getChar()) != ']' ||
976 27 : !peekChars(2, cp) ||
977 27 : cp[0] != ']' ||
978 27 : cp[1] != '>') {
979 270 : if (c == EOF)
980 0 : goto bad_xml_markup;
981 270 : if (!tokenbuf.append(c))
982 0 : goto error;
983 : }
984 27 : consumeKnownChar(']');
985 27 : consumeKnownChar('>');
986 :
987 27 : JSAtom *cdataContent = atomize(cx, tokenbuf);
988 27 : if (!cdataContent)
989 0 : goto error;
990 :
991 27 : tp->setAtom(JSOP_XMLCDATA, cdataContent);
992 27 : tp->pos.end.lineno = lineno;
993 27 : tt = TOK_XMLCDATA;
994 27 : goto out;
995 : }
996 0 : goto bad_xml_markup;
997 : }
998 : }
999 :
1000 : /* Check for processing instruction. */
1001 2833 : if (matchChar('?')) {
1002 0 : bool inTarget = true;
1003 0 : size_t targetLength = 0;
1004 0 : ptrdiff_t contentIndex = -1;
1005 :
1006 0 : tokenbuf.clear();
1007 0 : while ((c = getChar()) != '?' || peekChar() != '>') {
1008 0 : if (c == EOF)
1009 0 : goto bad_xml_markup;
1010 0 : if (inTarget) {
1011 0 : if (IsXMLSpace(c)) {
1012 0 : if (tokenbuf.empty())
1013 0 : goto bad_xml_markup;
1014 0 : inTarget = false;
1015 : } else {
1016 0 : if (!(tokenbuf.empty()
1017 0 : ? IsXMLNamespaceStart(c)
1018 0 : : IsXMLNamespacePart(c))) {
1019 0 : goto bad_xml_markup;
1020 : }
1021 0 : ++targetLength;
1022 : }
1023 : } else {
1024 0 : if (contentIndex < 0 && !IsXMLSpace(c))
1025 0 : contentIndex = tokenbuf.length();
1026 : }
1027 0 : if (!tokenbuf.append(c))
1028 0 : goto error;
1029 : }
1030 0 : if (targetLength == 0)
1031 0 : goto bad_xml_markup;
1032 :
1033 : JSAtom *data;
1034 0 : if (contentIndex < 0) {
1035 0 : data = cx->runtime->atomState.emptyAtom;
1036 : } else {
1037 0 : data = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
1038 0 : tokenbuf.length() - contentIndex);
1039 0 : if (!data)
1040 0 : goto error;
1041 : }
1042 0 : tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1043 0 : consumeKnownChar('>');
1044 0 : JSAtom *target = atomize(cx, tokenbuf);
1045 0 : if (!target)
1046 0 : goto error;
1047 0 : tp->setProcessingInstruction(target->asPropertyName(), data);
1048 0 : tp->pos.end.lineno = lineno;
1049 0 : tt = TOK_XMLPI;
1050 0 : goto out;
1051 : }
1052 :
1053 : /* An XML start-of-tag character. */
1054 2833 : tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1055 :
1056 : out:
1057 2860 : *ttp = tt;
1058 2860 : *tpp = tp;
1059 2860 : return true;
1060 :
1061 : bad_xml_markup:
1062 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1063 : error:
1064 0 : *ttp = TOK_ERROR;
1065 0 : *tpp = tp;
1066 0 : return false;
1067 : }
1068 : #endif /* JS_HAS_XML_SUPPORT */
1069 :
1070 : /*
1071 : * We have encountered a '\': check for a Unicode escape sequence after it.
1072 : * Return 'true' and the character code value (by value) if we found a
1073 : * Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
1074 : * advance along the buffer.
1075 : */
1076 : bool
1077 0 : TokenStream::peekUnicodeEscape(int *result)
1078 : {
1079 : jschar cp[5];
1080 :
1081 0 : if (peekChars(5, cp) && cp[0] == 'u' &&
1082 0 : JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
1083 0 : JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
1084 : {
1085 0 : *result = (((((JS7_UNHEX(cp[1]) << 4)
1086 0 : + JS7_UNHEX(cp[2])) << 4)
1087 0 : + JS7_UNHEX(cp[3])) << 4)
1088 0 : + JS7_UNHEX(cp[4]);
1089 0 : return true;
1090 : }
1091 0 : return false;
1092 : }
1093 :
1094 : bool
1095 0 : TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
1096 : {
1097 0 : if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
1098 0 : skipChars(5);
1099 0 : return true;
1100 : }
1101 0 : return false;
1102 : }
1103 :
1104 : bool
1105 0 : TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
1106 : {
1107 0 : if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
1108 0 : skipChars(5);
1109 0 : return true;
1110 : }
1111 0 : return false;
1112 : }
1113 :
1114 : /*
1115 : * Helper function which returns true if the first length(q) characters in p are
1116 : * the same as the characters in q.
1117 : */
1118 : static bool
1119 587237 : CharsMatch(const jschar *p, const char *q) {
1120 1174831 : while (*q) {
1121 587591 : if (*p++ != *q++)
1122 587234 : return false;
1123 : }
1124 3 : return true;
1125 : }
1126 :
1127 : bool
1128 2 : TokenStream::getAtLine()
1129 : {
1130 : int c;
1131 : jschar cp[5];
1132 : unsigned i, line, temp;
1133 : char filenameBuf[1024];
1134 :
1135 : /*
1136 : * Hack for source filters such as the Mozilla XUL preprocessor:
1137 : * "//@line 123\n" sets the number of the *next* line after the
1138 : * comment to 123. If we reach here, we've already seen "//".
1139 : */
1140 2 : if (peekChars(5, cp) && CharsMatch(cp, "@line")) {
1141 2 : skipChars(5);
1142 6 : while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1143 2 : continue;
1144 2 : if (JS7_ISDEC(c)) {
1145 2 : line = JS7_UNDEC(c);
1146 8 : while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1147 4 : temp = 10 * line + JS7_UNDEC(c);
1148 4 : if (temp < line) {
1149 : /* Ignore overlarge line numbers. */
1150 0 : return true;
1151 : }
1152 4 : line = temp;
1153 : }
1154 6 : while (c != '\n' && c != EOF && IsSpaceOrBOM2(c))
1155 2 : c = getChar();
1156 2 : i = 0;
1157 2 : if (c == '"') {
1158 10 : while ((c = getChar()) != EOF && c != '"') {
1159 6 : if (c == '\n') {
1160 0 : ungetChar(c);
1161 0 : return true;
1162 : }
1163 6 : if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1164 0 : return true;
1165 6 : filenameBuf[i++] = (char) c;
1166 : }
1167 2 : if (c == '"') {
1168 4 : while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1169 0 : continue;
1170 : }
1171 : }
1172 2 : filenameBuf[i] = '\0';
1173 2 : if (c == EOF || c == '\n') {
1174 2 : if (i > 0) {
1175 2 : if (flags & TSF_OWNFILENAME)
1176 1 : cx->free_((void *) filename);
1177 2 : filename = JS_strdup(cx, filenameBuf);
1178 2 : if (!filename)
1179 0 : return false;
1180 2 : flags |= TSF_OWNFILENAME;
1181 : }
1182 2 : lineno = line;
1183 : }
1184 : }
1185 2 : ungetChar(c);
1186 : }
1187 2 : return true;
1188 : }
1189 :
1190 : bool
1191 637462 : TokenStream::getAtSourceMappingURL()
1192 : {
1193 : jschar peeked[18];
1194 :
1195 : /* Match comments of the form @sourceMappingURL=<url> */
1196 637462 : if (peekChars(18, peeked) && CharsMatch(peeked, "@sourceMappingURL=")) {
1197 1 : skipChars(18);
1198 1 : tokenbuf.clear();
1199 :
1200 : jschar c;
1201 44 : while (!IsSpaceOrBOM2((c = getChar())) &&
1202 : c && c != jschar(EOF))
1203 42 : tokenbuf.append(c);
1204 :
1205 1 : if (tokenbuf.empty())
1206 : /* The source map's URL was missing, but not quite an exception that
1207 : * we should stop and drop everything for, though. */
1208 0 : return true;
1209 :
1210 1 : int len = tokenbuf.length();
1211 :
1212 1 : if (sourceMap)
1213 0 : cx->free_(sourceMap);
1214 1 : sourceMap = (jschar *) cx->malloc_(sizeof(jschar) * (len + 1));
1215 1 : if (!sourceMap)
1216 0 : return false;
1217 :
1218 43 : for (int i = 0; i < len; i++)
1219 42 : sourceMap[i] = tokenbuf[i];
1220 1 : sourceMap[len] = '\0';
1221 : }
1222 637462 : return true;
1223 : }
1224 :
1225 : Token *
1226 116252497 : TokenStream::newToken(ptrdiff_t adjust)
1227 : {
1228 116252497 : cursor = (cursor + 1) & ntokensMask;
1229 116252497 : Token *tp = &tokens[cursor];
1230 116252497 : tp->ptr = userbuf.addressOfNextRawChar() + adjust;
1231 116252497 : tp->pos.begin.index = tp->ptr - linebase;
1232 116252497 : tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
1233 116252497 : return tp;
1234 : }
1235 :
1236 : JS_ALWAYS_INLINE JSAtom *
1237 10502012 : TokenStream::atomize(JSContext *cx, CharBuffer &cb)
1238 : {
1239 10502012 : return js_AtomizeChars(cx, cb.begin(), cb.length());
1240 : }
1241 :
1242 : #ifdef DEBUG
1243 : bool
1244 115312137 : IsTokenSane(Token *tp)
1245 : {
1246 : /*
1247 : * Nb: TOK_EOL should never be used in an actual Token; it should only be
1248 : * returned as a TokenKind from peekTokenSameLine().
1249 : */
1250 115312137 : if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
1251 0 : return false;
1252 :
1253 115312137 : if (tp->pos.begin.lineno == tp->pos.end.lineno) {
1254 115310723 : if (tp->pos.begin.index > tp->pos.end.index)
1255 0 : return false;
1256 : } else {
1257 : /* Only certain token kinds can be multi-line. */
1258 1414 : switch (tp->type) {
1259 : case TOK_STRING:
1260 : case TOK_XMLATTR:
1261 : case TOK_XMLSPACE:
1262 : case TOK_XMLTEXT:
1263 : case TOK_XMLCOMMENT:
1264 : case TOK_XMLCDATA:
1265 : case TOK_XMLPI:
1266 1414 : break;
1267 : default:
1268 0 : return false;
1269 : }
1270 : }
1271 115312137 : return true;
1272 : }
1273 : #endif
1274 :
1275 : bool
1276 0 : TokenStream::putIdentInTokenbuf(const jschar *identStart)
1277 : {
1278 : int32_t c, qc;
1279 0 : const jschar *tmp = userbuf.addressOfNextRawChar();
1280 0 : userbuf.setAddressOfNextRawChar(identStart);
1281 :
1282 0 : tokenbuf.clear();
1283 0 : for (;;) {
1284 0 : c = getCharIgnoreEOL();
1285 0 : if (!IsIdentifierPart(c)) {
1286 0 : if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1287 : break;
1288 0 : c = qc;
1289 : }
1290 0 : if (!tokenbuf.append(c)) {
1291 0 : userbuf.setAddressOfNextRawChar(tmp);
1292 0 : return false;
1293 : }
1294 : }
1295 0 : userbuf.setAddressOfNextRawChar(tmp);
1296 0 : return true;
1297 : }
1298 :
1299 : bool
1300 24728336 : TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp)
1301 : {
1302 24728336 : JS_ASSERT(!ttp == !topp);
1303 :
1304 24728336 : const KeywordInfo *kw = FindKeyword(s, length);
1305 24728336 : if (!kw)
1306 16968010 : return true;
1307 :
1308 7760326 : if (kw->tokentype == TOK_RESERVED) {
1309 : return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1310 0 : JSMSG_RESERVED_ID, kw->chars);
1311 : }
1312 :
1313 7760326 : if (kw->tokentype != TOK_STRICT_RESERVED) {
1314 7760239 : if (kw->version <= versionNumber()) {
1315 : /* Working keyword. */
1316 7760229 : if (ttp) {
1317 7760229 : *ttp = kw->tokentype;
1318 7760229 : *topp = (JSOp) kw->op;
1319 7760229 : return true;
1320 : }
1321 : return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1322 0 : JSMSG_RESERVED_ID, kw->chars);
1323 : }
1324 :
1325 : /*
1326 : * The keyword is not in this version. Treat it as an identifier,
1327 : * unless it is let or yield which we treat as TOK_STRICT_RESERVED by
1328 : * falling through to the code below (ES5 forbids them in strict mode).
1329 : */
1330 10 : if (kw->tokentype != TOK_LET && kw->tokentype != TOK_YIELD)
1331 0 : return true;
1332 : }
1333 :
1334 : /* Strict reserved word. */
1335 97 : if (isStrictMode())
1336 0 : return ReportStrictModeError(cx, this, NULL, NULL, JSMSG_RESERVED_ID, kw->chars);
1337 : return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_STRICT | JSREPORT_WARNING,
1338 97 : JSMSG_RESERVED_ID, kw->chars);
1339 : }
1340 :
1341 : enum FirstCharKind {
1342 : Other,
1343 : OneChar,
1344 : Ident,
1345 : Dot,
1346 : Equals,
1347 : String,
1348 : Dec,
1349 : Colon,
1350 : Plus,
1351 : HexOct,
1352 :
1353 : /* These two must be last, so that |c >= Space| matches both. */
1354 : Space,
1355 : EOL
1356 : };
1357 :
1358 : #define _______ Other
1359 :
1360 : /*
1361 : * OneChar: 40, 41, 44, 59, 63, 91, 93, 123, 125: '(', ')', ',', ';', '?', '[', ']', '{', '}'
1362 : * Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1363 : * Dot: 46: '.'
1364 : * Equals: 61: '='
1365 : * String: 34, 39: '"', '\''
1366 : * Dec: 49..57: '1'..'9'
1367 : * Colon: 58: ':'
1368 : * Plus: 43: '+'
1369 : * HexOct: 48: '0'
1370 : * Space: 9, 11, 12: '\t', '\v', '\f'
1371 : * EOL: 10, 13: '\n', '\r'
1372 : */
1373 : static const uint8_t firstCharKinds[] = {
1374 : /* 0 1 2 3 4 5 6 7 8 9 */
1375 : /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
1376 : /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
1377 : /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1378 : /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
1379 : /* 40+ */ OneChar, OneChar, _______, Plus, OneChar, _______, Dot, _______, HexOct, Dec,
1380 : /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Colon, OneChar,
1381 : /* 60+ */ _______, Equals, _______, OneChar, _______, Ident, Ident, Ident, Ident, Ident,
1382 : /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1383 : /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1384 : /* 90+ */ Ident, OneChar, _______, OneChar, _______, Ident, _______, Ident, Ident, Ident,
1385 : /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1386 : /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1387 : /* 120+ */ Ident, Ident, Ident, OneChar, _______, OneChar, _______, _______
1388 : };
1389 :
1390 : #undef _______
1391 :
1392 : TokenKind
1393 115312137 : TokenStream::getTokenInternal()
1394 : {
1395 : TokenKind tt;
1396 : int c, qc;
1397 : Token *tp;
1398 : FirstCharKind c1kind;
1399 : const jschar *numStart;
1400 : bool hasFracOrExp;
1401 : const jschar *identStart;
1402 : bool hadUnicodeEscape;
1403 :
1404 : #if JS_HAS_XML_SUPPORT
1405 : /*
1406 : * Look for XML text and tags.
1407 : */
1408 115312137 : if (flags & (TSF_XMLTEXTMODE|TSF_XMLTAGMODE)) {
1409 10078 : if (!getXMLTextOrTag(&tt, &tp))
1410 0 : goto error;
1411 10078 : goto out;
1412 : }
1413 : #endif
1414 :
1415 : retry:
1416 228533860 : if (JS_UNLIKELY(!userbuf.hasRawChars())) {
1417 172172 : tp = newToken(0);
1418 172172 : tt = TOK_EOF;
1419 172172 : flags |= TSF_EOF;
1420 172172 : goto out;
1421 : }
1422 :
1423 228361688 : c = userbuf.getRawChar();
1424 228361688 : JS_ASSERT(c != EOF);
1425 :
1426 : /*
1427 : * Chars not in the range 0..127 are rare. Getting them out of the way
1428 : * early allows subsequent checking to be faster.
1429 : */
1430 228361688 : if (JS_UNLIKELY(c >= 128)) {
1431 1 : if (IsSpaceOrBOM2(c)) {
1432 1 : if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1433 0 : updateLineInfoForEOL();
1434 0 : updateFlagsForEOL();
1435 : }
1436 :
1437 1 : goto retry;
1438 : }
1439 :
1440 0 : tp = newToken(-1);
1441 :
1442 : /* '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. */
1443 : JS_STATIC_ASSERT('$' < 128 && '_' < 128);
1444 0 : if (IsLetter(c)) {
1445 0 : identStart = userbuf.addressOfNextRawChar() - 1;
1446 0 : hadUnicodeEscape = false;
1447 0 : goto identifier;
1448 : }
1449 :
1450 0 : goto badchar;
1451 : }
1452 :
1453 : /*
1454 : * Get the token kind, based on the first char. The ordering of c1kind
1455 : * comparison is based on the frequency of tokens in real code. Minified
1456 : * and non-minified code have different characteristics, mostly in that
1457 : * whitespace occurs much less in minified code. Token kinds that fall in
1458 : * the 'Other' category typically account for less than 2% of all tokens,
1459 : * so their order doesn't matter much.
1460 : */
1461 228361687 : c1kind = FirstCharKind(firstCharKinds[c]);
1462 :
1463 : /*
1464 : * Skip over whitespace chars; update line state on EOLs. Even though
1465 : * whitespace isn't very common in minified code we have to handle it first
1466 : * (and jump back to 'retry') before calling newToken().
1467 : */
1468 228361687 : if (c1kind >= Space) {
1469 112291440 : if (c1kind == EOL) {
1470 : /* If it's a \r\n sequence: treat as a single EOL, skip over the \n. */
1471 16032088 : if (c == '\r' && userbuf.hasRawChars())
1472 39555 : userbuf.matchRawChar('\n');
1473 16032088 : updateLineInfoForEOL();
1474 16032088 : updateFlagsForEOL();
1475 : }
1476 112291440 : goto retry;
1477 : }
1478 :
1479 116070247 : tp = newToken(-1);
1480 :
1481 : /*
1482 : * Look for an unambiguous single-char token.
1483 : */
1484 116070247 : if (c1kind == OneChar) {
1485 48881274 : tt = (TokenKind)oneCharTokens[c];
1486 48881274 : goto out;
1487 : }
1488 :
1489 : /*
1490 : * Look for an identifier.
1491 : */
1492 67188973 : if (c1kind == Ident) {
1493 32624480 : identStart = userbuf.addressOfNextRawChar() - 1;
1494 32624480 : hadUnicodeEscape = false;
1495 :
1496 : identifier:
1497 209787359 : for (;;) {
1498 242411839 : c = getCharIgnoreEOL();
1499 242411839 : if (c == EOF)
1500 11307 : break;
1501 242400532 : if (!IsIdentifierPart(c)) {
1502 32613173 : if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1503 32613173 : break;
1504 0 : hadUnicodeEscape = true;
1505 : }
1506 : }
1507 32624480 : ungetCharIgnoreEOL(c);
1508 :
1509 : /* Convert the escapes by putting into tokenbuf. */
1510 32624480 : if (hadUnicodeEscape && !putIdentInTokenbuf(identStart))
1511 0 : goto error;
1512 :
1513 : /* Check for keywords unless parser asks us to ignore keywords. */
1514 32624480 : if (!(flags & TSF_KEYWORD_IS_NAME)) {
1515 : const jschar *chars;
1516 : size_t length;
1517 24728138 : if (hadUnicodeEscape) {
1518 0 : chars = tokenbuf.begin();
1519 0 : length = tokenbuf.length();
1520 : } else {
1521 24728138 : chars = identStart;
1522 24728138 : length = userbuf.addressOfNextRawChar() - identStart;
1523 : }
1524 24728138 : tt = TOK_NAME;
1525 24728138 : if (!checkForKeyword(chars, length, &tt, &tp->t_op))
1526 0 : goto error;
1527 24728138 : if (tt != TOK_NAME)
1528 7760229 : goto out;
1529 : }
1530 :
1531 : /*
1532 : * Identifiers containing no Unicode escapes can be atomized directly
1533 : * from userbuf. The rest must use the escapes converted via
1534 : * tokenbuf before atomizing.
1535 : */
1536 : JSAtom *atom;
1537 24864251 : if (!hadUnicodeEscape)
1538 24864251 : atom = js_AtomizeChars(cx, identStart, userbuf.addressOfNextRawChar() - identStart);
1539 : else
1540 0 : atom = atomize(cx, tokenbuf);
1541 24864251 : if (!atom)
1542 0 : goto error;
1543 24864251 : tp->setName(JSOP_NAME, atom->asPropertyName());
1544 24864251 : tt = TOK_NAME;
1545 24864251 : goto out;
1546 : }
1547 :
1548 34564493 : if (c1kind == Dot) {
1549 6497062 : c = getCharIgnoreEOL();
1550 6497062 : if (JS7_ISDEC(c)) {
1551 201 : numStart = userbuf.addressOfNextRawChar() - 2;
1552 201 : goto decimal_dot;
1553 : }
1554 : #if JS_HAS_XML_SUPPORT
1555 6496861 : if (c == '.') {
1556 9 : tt = TOK_DBLDOT;
1557 9 : goto out;
1558 : }
1559 : #endif
1560 6496852 : ungetCharIgnoreEOL(c);
1561 6496852 : tt = TOK_DOT;
1562 6496852 : goto out;
1563 : }
1564 :
1565 28067431 : if (c1kind == Equals) {
1566 4457199 : if (matchChar('=')) {
1567 259346 : if (matchChar('=')) {
1568 41605 : tp->t_op = JSOP_STRICTEQ;
1569 41605 : tt = TOK_STRICTEQ;
1570 : } else {
1571 217741 : tp->t_op = JSOP_EQ;
1572 217741 : tt = TOK_EQ;
1573 : }
1574 : } else {
1575 4197853 : tp->t_op = JSOP_NOP;
1576 4197853 : tt = TOK_ASSIGN;
1577 : }
1578 4457199 : goto out;
1579 : }
1580 :
1581 : /*
1582 : * Look for a string.
1583 : */
1584 23610232 : if (c1kind == String) {
1585 10497751 : qc = c;
1586 10497751 : tokenbuf.clear();
1587 593392968 : while (true) {
1588 : /*
1589 : * We need to detect any of these chars: " or ', \n (or its
1590 : * equivalents), \\, EOF. We use maybeStrSpecial[] in a manner
1591 : * similar to maybeEOL[], see above. Because we detect EOL
1592 : * sequences here and put them back immediately, we can use
1593 : * getCharIgnoreEOL().
1594 : */
1595 603890719 : c = getCharIgnoreEOL();
1596 603890719 : if (maybeStrSpecial[c & 0xff]) {
1597 42122622 : if (c == qc)
1598 : break;
1599 31624871 : if (c == '\\') {
1600 27772915 : switch (c = getChar()) {
1601 913 : case 'b': c = '\b'; break;
1602 931 : case 'f': c = '\f'; break;
1603 213380 : case 'n': c = '\n'; break;
1604 7858 : case 'r': c = '\r'; break;
1605 6173 : case 't': c = '\t'; break;
1606 913 : case 'v': c = '\v'; break;
1607 :
1608 : default:
1609 27542747 : if ('0' <= c && c < '8') {
1610 775 : int32_t val = JS7_UNDEC(c);
1611 :
1612 775 : c = peekChar();
1613 : /* Strict mode code allows only \0, then a non-digit. */
1614 775 : if (val != 0 || JS7_ISDEC(c)) {
1615 703 : if (!ReportStrictModeError(cx, this, NULL, NULL,
1616 703 : JSMSG_DEPRECATED_OCTAL)) {
1617 0 : goto error;
1618 : }
1619 703 : setOctalCharacterEscape();
1620 : }
1621 775 : if ('0' <= c && c < '8') {
1622 703 : val = 8 * val + JS7_UNDEC(c);
1623 703 : getChar();
1624 703 : c = peekChar();
1625 703 : if ('0' <= c && c < '8') {
1626 703 : int32_t save = val;
1627 703 : val = 8 * val + JS7_UNDEC(c);
1628 703 : if (val <= 0377)
1629 703 : getChar();
1630 : else
1631 0 : val = save;
1632 : }
1633 : }
1634 :
1635 775 : c = (jschar)val;
1636 27541972 : } else if (c == 'u') {
1637 : jschar cp[4];
1638 796446 : if (peekChars(4, cp) &&
1639 353976 : JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1640 353976 : JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1641 176988 : c = (((((JS7_UNHEX(cp[0]) << 4)
1642 176988 : + JS7_UNHEX(cp[1])) << 4)
1643 176988 : + JS7_UNHEX(cp[2])) << 4)
1644 530964 : + JS7_UNHEX(cp[3]);
1645 88494 : skipChars(4);
1646 : } else {
1647 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1648 0 : JSMSG_MALFORMED_ESCAPE, "Unicode");
1649 0 : goto error;
1650 : }
1651 27453478 : } else if (c == 'x') {
1652 : jschar cp[2];
1653 61960 : if (peekChars(2, cp) &&
1654 49568 : JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1655 12392 : c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1656 12392 : skipChars(2);
1657 : } else {
1658 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1659 0 : JSMSG_MALFORMED_ESCAPE, "hexadecimal");
1660 0 : goto error;
1661 : }
1662 27441086 : } else if (c == '\n') {
1663 : /*
1664 : * ES5 7.8.4: an escaped line terminator represents
1665 : * no character.
1666 : */
1667 23361 : continue;
1668 : }
1669 27519386 : break;
1670 : }
1671 3851956 : } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
1672 0 : ungetCharIgnoreEOL(c);
1673 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1674 0 : JSMSG_UNTERMINATED_STRING);
1675 0 : goto error;
1676 : }
1677 : }
1678 593369607 : if (!tokenbuf.append(c))
1679 0 : goto error;
1680 : }
1681 10497751 : JSAtom *atom = atomize(cx, tokenbuf);
1682 10497751 : if (!atom)
1683 0 : goto error;
1684 10497751 : tp->pos.end.lineno = lineno;
1685 10497751 : tp->setAtom(JSOP_STRING, atom);
1686 10497751 : tt = TOK_STRING;
1687 10497751 : goto out;
1688 : }
1689 :
1690 : /*
1691 : * Look for a decimal number.
1692 : */
1693 13112481 : if (c1kind == Dec) {
1694 3891069 : numStart = userbuf.addressOfNextRawChar() - 1;
1695 :
1696 : decimal:
1697 5886136 : hasFracOrExp = false;
1698 23498756 : while (JS7_ISDEC(c))
1699 11726484 : c = getCharIgnoreEOL();
1700 :
1701 5886136 : if (c == '.') {
1702 : decimal_dot:
1703 6338 : hasFracOrExp = true;
1704 28015 : do {
1705 28015 : c = getCharIgnoreEOL();
1706 : } while (JS7_ISDEC(c));
1707 : }
1708 5886337 : if (c == 'e' || c == 'E') {
1709 1166 : hasFracOrExp = true;
1710 1166 : c = getCharIgnoreEOL();
1711 1166 : if (c == '+' || c == '-')
1712 402 : c = getCharIgnoreEOL();
1713 1166 : if (!JS7_ISDEC(c)) {
1714 0 : ungetCharIgnoreEOL(c);
1715 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1716 0 : JSMSG_MISSING_EXPONENT);
1717 0 : goto error;
1718 : }
1719 2254 : do {
1720 2254 : c = getCharIgnoreEOL();
1721 : } while (JS7_ISDEC(c));
1722 : }
1723 5886337 : ungetCharIgnoreEOL(c);
1724 :
1725 5886337 : if (c != EOF && IsIdentifierStart(c)) {
1726 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1727 0 : goto error;
1728 : }
1729 :
1730 : /*
1731 : * Unlike identifiers and strings, numbers cannot contain escaped
1732 : * chars, so we don't need to use tokenbuf. Instead we can just
1733 : * convert the jschars in userbuf directly to the numeric value.
1734 : */
1735 : double dval;
1736 : const jschar *dummy;
1737 5886337 : if (!hasFracOrExp) {
1738 5879312 : if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), 10, &dummy, &dval))
1739 0 : goto error;
1740 : } else {
1741 7025 : if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
1742 0 : goto error;
1743 : }
1744 5886337 : tp->setNumber(dval);
1745 5886337 : tt = TOK_NUMBER;
1746 5886337 : goto out;
1747 : }
1748 :
1749 9221412 : if (c1kind == Colon) {
1750 : #if JS_HAS_XML_SUPPORT
1751 961916 : if (matchChar(':')) {
1752 72 : tt = TOK_DBLCOLON;
1753 72 : goto out;
1754 : }
1755 : #endif
1756 961844 : tp->t_op = JSOP_NOP;
1757 961844 : tt = TOK_COLON;
1758 961844 : goto out;
1759 : }
1760 :
1761 8259496 : if (c1kind == Plus) {
1762 4092336 : if (matchChar('=')) {
1763 66722 : tp->t_op = JSOP_ADD;
1764 66722 : tt = TOK_ADDASSIGN;
1765 4025614 : } else if (matchChar('+')) {
1766 1729766 : tt = TOK_INC;
1767 : } else {
1768 2295848 : tp->t_op = JSOP_POS;
1769 2295848 : tt = TOK_PLUS;
1770 : }
1771 4092336 : goto out;
1772 : }
1773 :
1774 : /*
1775 : * Look for a hexadecimal or octal number.
1776 : */
1777 4167160 : if (c1kind == HexOct) {
1778 : int radix;
1779 2020618 : c = getCharIgnoreEOL();
1780 2020618 : if (c == 'x' || c == 'X') {
1781 20003 : radix = 16;
1782 20003 : c = getCharIgnoreEOL();
1783 20003 : if (!JS7_ISHEX(c)) {
1784 0 : ungetCharIgnoreEOL(c);
1785 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_MISSING_HEXDIGITS);
1786 0 : goto error;
1787 : }
1788 20003 : numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0x' */
1789 99928 : while (JS7_ISHEX(c))
1790 59922 : c = getCharIgnoreEOL();
1791 2000615 : } else if (JS7_ISDEC(c)) {
1792 5548 : radix = 8;
1793 5548 : numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0' */
1794 27740 : while (JS7_ISDEC(c)) {
1795 : /* Octal integer literals are not permitted in strict mode code. */
1796 16644 : if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1797 0 : goto error;
1798 :
1799 : /*
1800 : * Outside strict mode, we permit 08 and 09 as decimal numbers,
1801 : * which makes our behaviour a superset of the ECMA numeric
1802 : * grammar. We might not always be so permissive, so we warn
1803 : * about it.
1804 : */
1805 16644 : if (c >= '8') {
1806 0 : if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1807 0 : JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1808 0 : goto error;
1809 : }
1810 0 : goto decimal; /* use the decimal scanner for the rest of the number */
1811 : }
1812 16644 : c = getCharIgnoreEOL();
1813 : }
1814 : } else {
1815 : /* '0' not followed by 'x', 'X' or a digit; scan as a decimal number. */
1816 1995067 : numStart = userbuf.addressOfNextRawChar() - 1;
1817 1995067 : goto decimal;
1818 : }
1819 25551 : ungetCharIgnoreEOL(c);
1820 :
1821 25551 : if (c != EOF && IsIdentifierStart(c)) {
1822 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1823 0 : goto error;
1824 : }
1825 :
1826 : double dval;
1827 : const jschar *dummy;
1828 25551 : if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
1829 0 : goto error;
1830 25551 : tp->setNumber(dval);
1831 25551 : tt = TOK_NUMBER;
1832 25551 : goto out;
1833 : }
1834 :
1835 : /*
1836 : * This handles everything else.
1837 : */
1838 2146542 : JS_ASSERT(c1kind == Other);
1839 2146542 : switch (c) {
1840 : case '\\':
1841 0 : hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
1842 0 : if (hadUnicodeEscape) {
1843 0 : identStart = userbuf.addressOfNextRawChar() - 6;
1844 0 : goto identifier;
1845 : }
1846 0 : goto badchar;
1847 :
1848 : case '|':
1849 162960 : if (matchChar(c)) {
1850 120345 : tt = TOK_OR;
1851 42615 : } else if (matchChar('=')) {
1852 12310 : tp->t_op = JSOP_BITOR;
1853 12310 : tt = TOK_BITORASSIGN;
1854 : } else {
1855 30305 : tt = TOK_BITOR;
1856 : }
1857 162960 : break;
1858 :
1859 : case '^':
1860 2655 : if (matchChar('=')) {
1861 54 : tp->t_op = JSOP_BITXOR;
1862 54 : tt = TOK_BITXORASSIGN;
1863 : } else {
1864 2601 : tt = TOK_BITXOR;
1865 : }
1866 2655 : break;
1867 :
1868 : case '&':
1869 163308 : if (matchChar('&')) {
1870 152461 : tt = TOK_AND;
1871 10847 : } else if (matchChar('=')) {
1872 186 : tp->t_op = JSOP_BITAND;
1873 186 : tt = TOK_BITANDASSIGN;
1874 : } else {
1875 10661 : tt = TOK_BITAND;
1876 : }
1877 163308 : break;
1878 :
1879 : case '!':
1880 514835 : if (matchChar('=')) {
1881 133057 : if (matchChar('=')) {
1882 21207 : tp->t_op = JSOP_STRICTNE;
1883 21207 : tt = TOK_STRICTNE;
1884 : } else {
1885 111850 : tp->t_op = JSOP_NE;
1886 111850 : tt = TOK_NE;
1887 : }
1888 : } else {
1889 381778 : tp->t_op = JSOP_NOT;
1890 381778 : tt = TOK_NOT;
1891 : }
1892 514835 : break;
1893 :
1894 : #if JS_HAS_XML_SUPPORT
1895 : case '@':
1896 18 : tt = TOK_AT;
1897 18 : break;
1898 : #endif
1899 :
1900 : case '<':
1901 : #if JS_HAS_XML_SUPPORT
1902 108418 : if ((flags & TSF_OPERAND) && !isStrictMode() && (hasXML() || peekChar() != '!')) {
1903 2860 : if (!getXMLMarkup(&tt, &tp))
1904 0 : goto error;
1905 2860 : goto out;
1906 : }
1907 : #endif
1908 :
1909 : /* NB: treat HTML begin-comment as comment-till-end-of-line */
1910 105558 : if (matchChar('!')) {
1911 9 : if (matchChar('-')) {
1912 0 : if (matchChar('-')) {
1913 0 : flags |= TSF_IN_HTML_COMMENT;
1914 0 : goto skipline;
1915 : }
1916 0 : ungetChar('-');
1917 : }
1918 9 : ungetChar('!');
1919 : }
1920 105558 : if (matchChar('<')) {
1921 2705 : tp->t_op = JSOP_LSH;
1922 2705 : tt = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
1923 : } else {
1924 102853 : if (matchChar('=')) {
1925 9864 : tp->t_op = JSOP_LE;
1926 9864 : tt = TOK_LE;
1927 : } else {
1928 92989 : tp->t_op = JSOP_LT;
1929 92989 : tt = TOK_LT;
1930 : }
1931 : }
1932 105558 : break;
1933 :
1934 : case '>':
1935 62036 : if (matchChar('>')) {
1936 5447 : if (matchChar('>')) {
1937 1434 : tp->t_op = JSOP_URSH;
1938 1434 : tt = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
1939 : } else {
1940 4013 : tp->t_op = JSOP_RSH;
1941 4013 : tt = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
1942 : }
1943 : } else {
1944 56589 : if (matchChar('=')) {
1945 15692 : tp->t_op = JSOP_GE;
1946 15692 : tt = TOK_GE;
1947 : } else {
1948 40897 : tp->t_op = JSOP_GT;
1949 40897 : tt = TOK_GT;
1950 : }
1951 : }
1952 62036 : break;
1953 :
1954 : case '*':
1955 10941 : tp->t_op = JSOP_MUL;
1956 10941 : tt = matchChar('=') ? TOK_MULASSIGN : TOK_STAR;
1957 10941 : break;
1958 :
1959 : case '/':
1960 : /*
1961 : * Look for a single-line comment.
1962 : */
1963 982859 : if (matchChar('/')) {
1964 637462 : if (cx->hasAtLineOption() && !getAtLine())
1965 0 : goto error;
1966 :
1967 637462 : if (!getAtSourceMappingURL())
1968 0 : goto error;
1969 :
1970 : skipline:
1971 : /* Optimize line skipping if we are not in an HTML comment. */
1972 637462 : if (flags & TSF_IN_HTML_COMMENT) {
1973 0 : while ((c = getChar()) != EOF && c != '\n') {
1974 0 : if (c == '-' && matchChar('-') && matchChar('>'))
1975 0 : flags &= ~TSF_IN_HTML_COMMENT;
1976 : }
1977 : } else {
1978 33229643 : while ((c = getChar()) != EOF && c != '\n')
1979 31954719 : continue;
1980 : }
1981 637462 : ungetChar(c);
1982 637462 : cursor = (cursor - 1) & ntokensMask;
1983 637462 : goto retry;
1984 : }
1985 :
1986 : /*
1987 : * Look for a multi-line comment.
1988 : */
1989 345397 : if (matchChar('*')) {
1990 302898 : unsigned linenoBefore = lineno;
1991 82452485 : while ((c = getChar()) != EOF &&
1992 2543769 : !(c == '*' && matchChar('/'))) {
1993 : /* Ignore all characters until comment close. */
1994 : }
1995 302898 : if (c == EOF) {
1996 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1997 0 : JSMSG_UNTERMINATED_COMMENT);
1998 0 : goto error;
1999 : }
2000 302898 : if (linenoBefore != lineno)
2001 264996 : updateFlagsForEOL();
2002 302898 : cursor = (cursor - 1) & ntokensMask;
2003 302898 : goto retry;
2004 : }
2005 :
2006 : /*
2007 : * Look for a regexp.
2008 : */
2009 42499 : if (flags & TSF_OPERAND) {
2010 34059 : tokenbuf.clear();
2011 :
2012 34059 : bool inCharClass = false;
2013 396682 : for (;;) {
2014 430741 : c = getChar();
2015 430741 : if (c == '\\') {
2016 29306 : if (!tokenbuf.append(c))
2017 0 : goto error;
2018 29306 : c = getChar();
2019 401435 : } else if (c == '[') {
2020 12216 : inCharClass = true;
2021 389219 : } else if (c == ']') {
2022 12207 : inCharClass = false;
2023 377012 : } else if (c == '/' && !inCharClass) {
2024 : /* For compat with IE, allow unescaped / in char classes. */
2025 : break;
2026 : }
2027 396682 : if (c == '\n' || c == EOF) {
2028 0 : ungetChar(c);
2029 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
2030 0 : JSMSG_UNTERMINATED_REGEXP);
2031 0 : goto error;
2032 : }
2033 396682 : if (!tokenbuf.append(c))
2034 0 : goto error;
2035 : }
2036 :
2037 34059 : RegExpFlag reflags = NoFlags;
2038 34059 : unsigned length = tokenbuf.length() + 1;
2039 23534 : while (true) {
2040 57593 : c = peekChar();
2041 57593 : if (c == 'g' && !(reflags & GlobalFlag))
2042 20916 : reflags = RegExpFlag(reflags | GlobalFlag);
2043 36677 : else if (c == 'i' && !(reflags & IgnoreCaseFlag))
2044 2590 : reflags = RegExpFlag(reflags | IgnoreCaseFlag);
2045 34087 : else if (c == 'm' && !(reflags & MultilineFlag))
2046 1 : reflags = RegExpFlag(reflags | MultilineFlag);
2047 34086 : else if (c == 'y' && !(reflags & StickyFlag))
2048 27 : reflags = RegExpFlag(reflags | StickyFlag);
2049 : else
2050 : break;
2051 23534 : getChar();
2052 23534 : length++;
2053 : }
2054 :
2055 34059 : c = peekChar();
2056 68118 : if (JS7_ISLET(c)) {
2057 0 : char buf[2] = { '\0', '\0' };
2058 0 : tp->pos.begin.index += length + 1;
2059 0 : buf[0] = char(c);
2060 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
2061 0 : buf);
2062 0 : (void) getChar();
2063 0 : goto error;
2064 : }
2065 34059 : tp->setRegExpFlags(reflags);
2066 34059 : tt = TOK_REGEXP;
2067 34059 : break;
2068 : }
2069 :
2070 8440 : tp->t_op = JSOP_DIV;
2071 8440 : tt = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
2072 8440 : break;
2073 :
2074 : case '%':
2075 3030 : tp->t_op = JSOP_MOD;
2076 3030 : tt = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
2077 3030 : break;
2078 :
2079 : case '~':
2080 285 : tp->t_op = JSOP_BITNOT;
2081 285 : tt = TOK_BITNOT;
2082 285 : break;
2083 :
2084 : case '-':
2085 135179 : if (matchChar('=')) {
2086 1509 : tp->t_op = JSOP_SUB;
2087 1509 : tt = TOK_SUBASSIGN;
2088 133670 : } else if (matchChar(c)) {
2089 7620 : if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
2090 0 : flags &= ~TSF_IN_HTML_COMMENT;
2091 0 : goto skipline;
2092 : }
2093 7620 : tt = TOK_DEC;
2094 : } else {
2095 126050 : tp->t_op = JSOP_NEG;
2096 126050 : tt = TOK_MINUS;
2097 : }
2098 135179 : break;
2099 :
2100 : badchar:
2101 : default:
2102 18 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
2103 18 : goto error;
2104 : }
2105 :
2106 : out:
2107 115312119 : flags |= TSF_DIRTYLINE;
2108 115312119 : tp->pos.end.index = userbuf.addressOfNextRawChar() - linebase;
2109 115312119 : tp->type = tt;
2110 115312119 : JS_ASSERT(IsTokenSane(tp));
2111 115312119 : return tt;
2112 :
2113 : error:
2114 : /*
2115 : * For erroneous multi-line tokens we won't have changed end.lineno (it'll
2116 : * still be equal to begin.lineno) so we revert end.index to be equal to
2117 : * begin.index + 1 (as if it's a 1-char token) to avoid having inconsistent
2118 : * begin/end positions. end.index isn't used in error messages anyway.
2119 : */
2120 18 : flags |= TSF_DIRTYLINE;
2121 18 : tp->pos.end.index = tp->pos.begin.index + 1;
2122 18 : tp->type = TOK_ERROR;
2123 18 : JS_ASSERT(IsTokenSane(tp));
2124 : #ifdef DEBUG
2125 : /*
2126 : * Poisoning userbuf on error establishes an invariant: once an erroneous
2127 : * token has been seen, userbuf will not be consulted again. This is true
2128 : * because the parser will either (a) deal with the TOK_ERROR token by
2129 : * aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
2130 : * match what it expected, it will unget the token, and the next getToken()
2131 : * call will immediately return the just-gotten TOK_ERROR token again
2132 : * without consulting userbuf, thanks to the lookahead buffer.
2133 : */
2134 18 : userbuf.poison();
2135 : #endif
2136 18 : return TOK_ERROR;
2137 : }
2138 :
2139 : JS_FRIEND_API(int)
2140 0 : js_fgets(char *buf, int size, FILE *file)
2141 : {
2142 : int n, i, c;
2143 : JSBool crflag;
2144 :
2145 0 : n = size - 1;
2146 0 : if (n < 0)
2147 0 : return -1;
2148 :
2149 0 : crflag = JS_FALSE;
2150 0 : for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
2151 0 : buf[i] = c;
2152 0 : if (c == '\n') { /* any \n ends a line */
2153 0 : i++; /* keep the \n; we know there is room for \0 */
2154 0 : break;
2155 : }
2156 0 : if (crflag) { /* \r not followed by \n ends line at the \r */
2157 0 : ungetc(c, file);
2158 0 : break; /* and overwrite c in buf with \0 */
2159 : }
2160 0 : crflag = (c == '\r');
2161 : }
2162 :
2163 0 : buf[i] = '\0';
2164 0 : return i;
2165 : }
2166 :
2167 : #ifdef DEBUG
2168 : const char *
2169 0 : TokenKindToString(TokenKind tt)
2170 : {
2171 0 : switch (tt) {
2172 0 : case TOK_ERROR: return "TOK_ERROR";
2173 0 : case TOK_EOF: return "TOK_EOF";
2174 0 : case TOK_EOL: return "TOK_EOL";
2175 0 : case TOK_SEMI: return "TOK_SEMI";
2176 0 : case TOK_COMMA: return "TOK_COMMA";
2177 0 : case TOK_HOOK: return "TOK_HOOK";
2178 0 : case TOK_COLON: return "TOK_COLON";
2179 0 : case TOK_OR: return "TOK_OR";
2180 0 : case TOK_AND: return "TOK_AND";
2181 0 : case TOK_BITOR: return "TOK_BITOR";
2182 0 : case TOK_BITXOR: return "TOK_BITXOR";
2183 0 : case TOK_BITAND: return "TOK_BITAND";
2184 0 : case TOK_PLUS: return "TOK_PLUS";
2185 0 : case TOK_MINUS: return "TOK_MINUS";
2186 0 : case TOK_STAR: return "TOK_STAR";
2187 0 : case TOK_DIV: return "TOK_DIV";
2188 0 : case TOK_MOD: return "TOK_MOD";
2189 0 : case TOK_INC: return "TOK_INC";
2190 0 : case TOK_DEC: return "TOK_DEC";
2191 0 : case TOK_DOT: return "TOK_DOT";
2192 0 : case TOK_LB: return "TOK_LB";
2193 0 : case TOK_RB: return "TOK_RB";
2194 0 : case TOK_LC: return "TOK_LC";
2195 0 : case TOK_RC: return "TOK_RC";
2196 0 : case TOK_LP: return "TOK_LP";
2197 0 : case TOK_RP: return "TOK_RP";
2198 0 : case TOK_NAME: return "TOK_NAME";
2199 0 : case TOK_NUMBER: return "TOK_NUMBER";
2200 0 : case TOK_STRING: return "TOK_STRING";
2201 0 : case TOK_REGEXP: return "TOK_REGEXP";
2202 0 : case TOK_TRUE: return "TOK_TRUE";
2203 0 : case TOK_FALSE: return "TOK_FALSE";
2204 0 : case TOK_NULL: return "TOK_NULL";
2205 0 : case TOK_THIS: return "TOK_THIS";
2206 0 : case TOK_FUNCTION: return "TOK_FUNCTION";
2207 0 : case TOK_IF: return "TOK_IF";
2208 0 : case TOK_ELSE: return "TOK_ELSE";
2209 0 : case TOK_SWITCH: return "TOK_SWITCH";
2210 0 : case TOK_CASE: return "TOK_CASE";
2211 0 : case TOK_DEFAULT: return "TOK_DEFAULT";
2212 0 : case TOK_WHILE: return "TOK_WHILE";
2213 0 : case TOK_DO: return "TOK_DO";
2214 0 : case TOK_FOR: return "TOK_FOR";
2215 0 : case TOK_BREAK: return "TOK_BREAK";
2216 0 : case TOK_CONTINUE: return "TOK_CONTINUE";
2217 0 : case TOK_IN: return "TOK_IN";
2218 0 : case TOK_VAR: return "TOK_VAR";
2219 0 : case TOK_CONST: return "TOK_CONST";
2220 0 : case TOK_WITH: return "TOK_WITH";
2221 0 : case TOK_RETURN: return "TOK_RETURN";
2222 0 : case TOK_NEW: return "TOK_NEW";
2223 0 : case TOK_DELETE: return "TOK_DELETE";
2224 0 : case TOK_TRY: return "TOK_TRY";
2225 0 : case TOK_CATCH: return "TOK_CATCH";
2226 0 : case TOK_FINALLY: return "TOK_FINALLY";
2227 0 : case TOK_THROW: return "TOK_THROW";
2228 0 : case TOK_INSTANCEOF: return "TOK_INSTANCEOF";
2229 0 : case TOK_DEBUGGER: return "TOK_DEBUGGER";
2230 0 : case TOK_XMLSTAGO: return "TOK_XMLSTAGO";
2231 0 : case TOK_XMLETAGO: return "TOK_XMLETAGO";
2232 0 : case TOK_XMLPTAGC: return "TOK_XMLPTAGC";
2233 0 : case TOK_XMLTAGC: return "TOK_XMLTAGC";
2234 0 : case TOK_XMLNAME: return "TOK_XMLNAME";
2235 0 : case TOK_XMLATTR: return "TOK_XMLATTR";
2236 0 : case TOK_XMLSPACE: return "TOK_XMLSPACE";
2237 0 : case TOK_XMLTEXT: return "TOK_XMLTEXT";
2238 0 : case TOK_XMLCOMMENT: return "TOK_XMLCOMMENT";
2239 0 : case TOK_XMLCDATA: return "TOK_XMLCDATA";
2240 0 : case TOK_XMLPI: return "TOK_XMLPI";
2241 0 : case TOK_AT: return "TOK_AT";
2242 0 : case TOK_DBLCOLON: return "TOK_DBLCOLON";
2243 0 : case TOK_DBLDOT: return "TOK_DBLDOT";
2244 0 : case TOK_FILTER: return "TOK_FILTER";
2245 0 : case TOK_XMLELEM: return "TOK_XMLELEM";
2246 0 : case TOK_XMLLIST: return "TOK_XMLLIST";
2247 0 : case TOK_YIELD: return "TOK_YIELD";
2248 0 : case TOK_LEXICALSCOPE: return "TOK_LEXICALSCOPE";
2249 0 : case TOK_LET: return "TOK_LET";
2250 0 : case TOK_RESERVED: return "TOK_RESERVED";
2251 0 : case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
2252 0 : case TOK_STRICTEQ: return "TOK_STRICTEQ";
2253 0 : case TOK_EQ: return "TOK_EQ";
2254 0 : case TOK_STRICTNE: return "TOK_STRICTNE";
2255 0 : case TOK_NE: return "TOK_NE";
2256 0 : case TOK_TYPEOF: return "TOK_TYPEOF";
2257 0 : case TOK_VOID: return "TOK_VOID";
2258 0 : case TOK_NOT: return "TOK_NOT";
2259 0 : case TOK_BITNOT: return "TOK_BITNOT";
2260 0 : case TOK_LT: return "TOK_LT";
2261 0 : case TOK_LE: return "TOK_LE";
2262 0 : case TOK_GT: return "TOK_GT";
2263 0 : case TOK_GE: return "TOK_GE";
2264 0 : case TOK_LSH: return "TOK_LSH";
2265 0 : case TOK_RSH: return "TOK_RSH";
2266 0 : case TOK_URSH: return "TOK_URSH";
2267 0 : case TOK_ASSIGN: return "TOK_ASSIGN";
2268 0 : case TOK_ADDASSIGN: return "TOK_ADDASSIGN";
2269 0 : case TOK_SUBASSIGN: return "TOK_SUBASSIGN";
2270 0 : case TOK_BITORASSIGN: return "TOK_BITORASSIGN";
2271 0 : case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN";
2272 0 : case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN";
2273 0 : case TOK_LSHASSIGN: return "TOK_LSHASSIGN";
2274 0 : case TOK_RSHASSIGN: return "TOK_RSHASSIGN";
2275 0 : case TOK_URSHASSIGN: return "TOK_URSHASSIGN";
2276 0 : case TOK_MULASSIGN: return "TOK_MULASSIGN";
2277 0 : case TOK_DIVASSIGN: return "TOK_DIVASSIGN";
2278 0 : case TOK_MODASSIGN: return "TOK_MODASSIGN";
2279 0 : case TOK_LIMIT: break;
2280 : }
2281 :
2282 0 : return "<bad TokenKind>";
2283 : }
2284 : #endif
|