1 : /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : *
3 : * ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is Mozilla Communicator client code, released
17 : * March 31, 1998.
18 : *
19 : * The Initial Developer of the Original Code is
20 : * Netscape Communications Corporation.
21 : * Portions created by the Initial Developer are Copyright (C) 1998
22 : * the Initial Developer. All Rights Reserved.
23 : *
24 : * Contributor(s):
25 : * Nick Fitzgerald <nfitzgerald@mozilla.com>
26 : *
27 : * Alternatively, the contents of this file may be used under the terms of
28 : * either of the GNU General Public License Version 2 or later (the "GPL"),
29 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 : * in which case the provisions of the GPL or the LGPL are applicable instead
31 : * of those above. If you wish to allow use of your version of this file only
32 : * under the terms of either the GPL or the LGPL, and not to allow others to
33 : * use your version of this file under the terms of the MPL, indicate your
34 : * decision by deleting the provisions above and replace them with the notice
35 : * and other provisions required by the GPL or the LGPL. If you do not delete
36 : * the provisions above, a recipient may use your version of this file under
37 : * the terms of any one of the MPL, the GPL or the LGPL.
38 : *
39 : * ***** END LICENSE BLOCK ***** */
40 :
41 : #ifndef TokenStream_h__
42 : #define TokenStream_h__
43 :
44 : /*
45 : * JS lexical scanner interface.
46 : */
47 : #include <stddef.h>
48 : #include <stdio.h>
49 : #include <stdarg.h>
50 : #include "jscntxt.h"
51 : #include "jsversion.h"
52 : #include "jsopcode.h"
53 : #include "jsprvtd.h"
54 : #include "jspubtd.h"
55 :
56 : #include "js/Vector.h"
57 :
58 : #define JS_KEYWORD(keyword, type, op, version) \
59 : extern const char js_##keyword##_str[];
60 : #include "jskeyword.tbl"
61 : #undef JS_KEYWORD
62 :
63 : namespace js {
64 :
65 : enum TokenKind {
66 : TOK_ERROR = -1, /* well-known as the only code < EOF */
67 : TOK_EOF, /* end of file */
68 : TOK_EOL, /* end of line; only returned by peekTokenSameLine() */
69 : TOK_SEMI, /* semicolon */
70 : TOK_COMMA, /* comma operator */
71 : TOK_HOOK, TOK_COLON, /* conditional (?:) */
72 : TOK_OR, /* logical or (||) */
73 : TOK_AND, /* logical and (&&) */
74 : TOK_BITOR, /* bitwise-or (|) */
75 : TOK_BITXOR, /* bitwise-xor (^) */
76 : TOK_BITAND, /* bitwise-and (&) */
77 : TOK_PLUS, /* plus */
78 : TOK_MINUS, /* minus */
79 : TOK_STAR, /* multiply */
80 : TOK_DIV, /* divide */
81 : TOK_MOD, /* modulus */
82 : TOK_INC, TOK_DEC, /* increment/decrement (++ --) */
83 : TOK_DOT, /* member operator (.) */
84 : TOK_LB, TOK_RB, /* left and right brackets */
85 : TOK_LC, TOK_RC, /* left and right curlies (braces) */
86 : TOK_LP, TOK_RP, /* left and right parentheses */
87 : TOK_NAME, /* identifier */
88 : TOK_NUMBER, /* numeric constant */
89 : TOK_STRING, /* string constant */
90 : TOK_REGEXP, /* RegExp constant */
91 : TOK_TRUE, /* true */
92 : TOK_FALSE, /* false */
93 : TOK_NULL, /* null */
94 : TOK_THIS, /* this */
95 : TOK_FUNCTION, /* function keyword */
96 : TOK_IF, /* if keyword */
97 : TOK_ELSE, /* else keyword */
98 : TOK_SWITCH, /* switch keyword */
99 : TOK_CASE, /* case keyword */
100 : TOK_DEFAULT, /* default keyword */
101 : TOK_WHILE, /* while keyword */
102 : TOK_DO, /* do keyword */
103 : TOK_FOR, /* for keyword */
104 : TOK_BREAK, /* break keyword */
105 : TOK_CONTINUE, /* continue keyword */
106 : TOK_IN, /* in keyword */
107 : TOK_VAR, /* var keyword */
108 : TOK_CONST, /* const keyword */
109 : TOK_WITH, /* with keyword */
110 : TOK_RETURN, /* return keyword */
111 : TOK_NEW, /* new keyword */
112 : TOK_DELETE, /* delete keyword */
113 : TOK_TRY, /* try keyword */
114 : TOK_CATCH, /* catch keyword */
115 : TOK_FINALLY, /* finally keyword */
116 : TOK_THROW, /* throw keyword */
117 : TOK_INSTANCEOF, /* instanceof keyword */
118 : TOK_DEBUGGER, /* debugger keyword */
119 : TOK_XMLSTAGO, /* XML start tag open (<) */
120 : TOK_XMLETAGO, /* XML end tag open (</) */
121 : TOK_XMLPTAGC, /* XML point tag close (/>) */
122 : TOK_XMLTAGC, /* XML start or end tag close (>) */
123 : TOK_XMLNAME, /* XML start-tag non-final fragment */
124 : TOK_XMLATTR, /* XML quoted attribute value */
125 : TOK_XMLSPACE, /* XML whitespace */
126 : TOK_XMLTEXT, /* XML text */
127 : TOK_XMLCOMMENT, /* XML comment */
128 : TOK_XMLCDATA, /* XML CDATA section */
129 : TOK_XMLPI, /* XML processing instruction */
130 : TOK_AT, /* XML attribute op (@) */
131 : TOK_DBLCOLON, /* namespace qualified name op (::) */
132 : TOK_DBLDOT, /* XML descendant op (..) */
133 : TOK_FILTER, /* XML filtering predicate op (.()) */
134 : TOK_XMLELEM, /* XML element node type (no token) */
135 : TOK_XMLLIST, /* XML list node type (no token) */
136 : TOK_YIELD, /* yield from generator function */
137 : TOK_LEXICALSCOPE, /* block scope AST node label */
138 : TOK_LET, /* let keyword */
139 : TOK_RESERVED, /* reserved keywords */
140 : TOK_STRICT_RESERVED, /* reserved keywords in strict mode */
141 :
142 : /*
143 : * The following token types occupy contiguous ranges to enable easy
144 : * range-testing.
145 : */
146 :
147 : /* Equality operation tokens, per TokenKindIsEquality */
148 : TOK_STRICTEQ,
149 : TOK_EQUALITY_START = TOK_STRICTEQ,
150 : TOK_EQ,
151 : TOK_STRICTNE,
152 : TOK_NE,
153 : TOK_EQUALITY_LAST = TOK_NE,
154 :
155 : /* Unary operation tokens */
156 : TOK_TYPEOF,
157 : TOK_VOID,
158 : TOK_NOT,
159 : TOK_BITNOT,
160 :
161 : /* Relational ops (< <= > >=), per TokenKindIsRelational */
162 : TOK_LT,
163 : TOK_RELOP_START = TOK_LT,
164 : TOK_LE,
165 : TOK_GT,
166 : TOK_GE,
167 : TOK_RELOP_LAST = TOK_GE,
168 :
169 : /* Shift ops (<< >> >>>), per TokenKindIsShift */
170 : TOK_LSH,
171 : TOK_SHIFTOP_START = TOK_LSH,
172 : TOK_RSH,
173 : TOK_URSH,
174 : TOK_SHIFTOP_LAST = TOK_URSH,
175 :
176 : /* Assignment ops (= += -= etc.), per TokenKindIsAssignment */
177 : TOK_ASSIGN, /* assignment ops (= += -= etc.) */
178 : TOK_ASSIGNMENT_START = TOK_ASSIGN,
179 : TOK_ADDASSIGN,
180 : TOK_SUBASSIGN,
181 : TOK_BITORASSIGN,
182 : TOK_BITXORASSIGN,
183 : TOK_BITANDASSIGN,
184 : TOK_LSHASSIGN,
185 : TOK_RSHASSIGN,
186 : TOK_URSHASSIGN,
187 : TOK_MULASSIGN,
188 : TOK_DIVASSIGN,
189 : TOK_MODASSIGN,
190 : TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,
191 :
192 : TOK_LIMIT /* domain size */
193 : };
194 :
195 : inline bool
196 31530335 : TokenKindIsEquality(TokenKind tt)
197 : {
198 31530335 : return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;
199 : }
200 :
201 : inline bool
202 31830518 : TokenKindIsRelational(TokenKind tt)
203 : {
204 31830518 : return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;
205 : }
206 :
207 : inline bool
208 31838494 : TokenKindIsShift(TokenKind tt)
209 : {
210 31838494 : return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;
211 : }
212 :
213 : inline bool
214 27802301 : TokenKindIsAssignment(TokenKind tt)
215 : {
216 27802301 : return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;
217 : }
218 :
219 : inline bool
220 : TokenKindIsDecl(TokenKind tt)
221 : {
222 : #if JS_HAS_BLOCK_SCOPE
223 : return tt == TOK_VAR || tt == TOK_LET;
224 : #else
225 : return tt == TOK_VAR;
226 : #endif
227 : }
228 :
229 : struct TokenPtr {
230 : uint32_t index; /* index of char in physical line */
231 : uint32_t lineno; /* physical line number */
232 :
233 : bool operator==(const TokenPtr& bptr) const {
234 : return index == bptr.index && lineno == bptr.lineno;
235 : }
236 :
237 : bool operator!=(const TokenPtr& bptr) const {
238 : return index != bptr.index || lineno != bptr.lineno;
239 : }
240 :
241 297140 : bool operator <(const TokenPtr& bptr) const {
242 : return lineno < bptr.lineno ||
243 297140 : (lineno == bptr.lineno && index < bptr.index);
244 : }
245 :
246 29912526 : bool operator <=(const TokenPtr& bptr) const {
247 : return lineno < bptr.lineno ||
248 29912526 : (lineno == bptr.lineno && index <= bptr.index);
249 : }
250 :
251 : bool operator >(const TokenPtr& bptr) const {
252 : return !(*this <= bptr);
253 : }
254 :
255 232121 : bool operator >=(const TokenPtr& bptr) const {
256 232121 : return !(*this < bptr);
257 : }
258 : };
259 :
260 : struct TokenPos {
261 : TokenPtr begin; /* first character and line of token */
262 : TokenPtr end; /* index 1 past last char, last line */
263 :
264 14227238 : static TokenPos make(const TokenPtr &begin, const TokenPtr &end) {
265 14227238 : JS_ASSERT(begin <= end);
266 14227238 : TokenPos pos = {begin, end};
267 : return pos;
268 : }
269 :
270 : /* Return a TokenPos that covers left, right, and anything in between. */
271 4416556 : static TokenPos box(const TokenPos &left, const TokenPos &right) {
272 4416556 : JS_ASSERT(left.begin <= left.end);
273 4416556 : JS_ASSERT(left.end <= right.begin);
274 4416556 : JS_ASSERT(right.begin <= right.end);
275 4416556 : TokenPos pos = {left.begin, right.end};
276 : return pos;
277 : }
278 :
279 : bool operator==(const TokenPos& bpos) const {
280 : return begin == bpos.begin && end == bpos.end;
281 : }
282 :
283 : bool operator!=(const TokenPos& bpos) const {
284 : return begin != bpos.begin || end != bpos.end;
285 : }
286 :
287 65019 : bool operator <(const TokenPos& bpos) const {
288 65019 : return begin < bpos.begin;
289 : }
290 :
291 : bool operator <=(const TokenPos& bpos) const {
292 : return begin <= bpos.begin;
293 : }
294 :
295 : bool operator >(const TokenPos& bpos) const {
296 : return !(*this <= bpos);
297 : }
298 :
299 64733 : bool operator >=(const TokenPos& bpos) const {
300 64733 : return !(*this < bpos);
301 : }
302 : };
303 :
304 : struct Token {
305 : TokenKind type; /* char value or above enumerator */
306 : TokenPos pos; /* token position in file */
307 : const jschar *ptr; /* beginning of token in line buffer */
308 : union {
309 : struct { /* name or string literal */
310 : JSOp op; /* operator, for minimal parser */
311 : union {
312 : private:
313 : friend struct Token;
314 : PropertyName *name; /* non-numeric atom */
315 : JSAtom *atom; /* potentially-numeric atom */
316 : } n;
317 : } s;
318 :
319 : private:
320 : friend struct Token;
321 : struct { /* pair for <?target data?> XML PI */
322 : PropertyName *target; /* non-empty */
323 : JSAtom *data; /* maybe empty, never null */
324 : } xmlpi;
325 : double number; /* floating point number */
326 : RegExpFlag reflags; /* regexp flags, use tokenbuf to access
327 : regexp chars */
328 : } u;
329 :
330 : /* Mutators */
331 :
332 : /*
333 : * FIXME: Init type early enough such that all mutators can assert
334 : * type-safety. See bug 697000.
335 : */
336 :
337 24864251 : void setName(JSOp op, PropertyName *name) {
338 24864251 : JS_ASSERT(op == JSOP_NAME);
339 24864251 : u.s.op = op;
340 24864251 : u.s.n.name = name;
341 24864251 : }
342 :
343 10503741 : void setAtom(JSOp op, JSAtom *atom) {
344 : JS_ASSERT(op == JSOP_STRING || op == JSOP_XMLCOMMENT || JSOP_XMLCDATA);
345 10503741 : u.s.op = op;
346 10503741 : u.s.n.atom = atom;
347 10503741 : }
348 :
349 0 : void setProcessingInstruction(PropertyName *target, JSAtom *data) {
350 0 : JS_ASSERT(target);
351 0 : JS_ASSERT(data);
352 0 : JS_ASSERT(!target->empty());
353 0 : u.xmlpi.target = target;
354 0 : u.xmlpi.data = data;
355 0 : }
356 :
357 34059 : void setRegExpFlags(js::RegExpFlag flags) {
358 34059 : JS_ASSERT((flags & AllFlags) == flags);
359 34059 : u.reflags = flags;
360 34059 : }
361 :
362 5911888 : void setNumber(double n) {
363 5911888 : u.number = n;
364 5911888 : }
365 :
366 : /* Type-safe accessors */
367 :
368 24864178 : PropertyName *name() const {
369 24864178 : JS_ASSERT(type == TOK_NAME);
370 24864178 : return u.s.n.name->asPropertyName(); /* poor-man's type verification */
371 : }
372 :
373 10503896 : JSAtom *atom() const {
374 0 : JS_ASSERT(type == TOK_STRING ||
375 : type == TOK_XMLNAME ||
376 : type == TOK_XMLATTR ||
377 : type == TOK_XMLTEXT ||
378 : type == TOK_XMLCDATA ||
379 : type == TOK_XMLSPACE ||
380 10503896 : type == TOK_XMLCOMMENT);
381 10503896 : return u.s.n.atom;
382 : }
383 :
384 0 : PropertyName *xmlPITarget() const {
385 0 : JS_ASSERT(type == TOK_XMLPI);
386 0 : return u.xmlpi.target;
387 : }
388 0 : JSAtom *xmlPIData() const {
389 0 : JS_ASSERT(type == TOK_XMLPI);
390 0 : return u.xmlpi.data;
391 : }
392 :
393 34059 : js::RegExpFlag regExpFlags() const {
394 34059 : JS_ASSERT(type == TOK_REGEXP);
395 34059 : JS_ASSERT((u.reflags & AllFlags) == u.reflags);
396 34059 : return u.reflags;
397 : }
398 :
399 5911888 : double number() const {
400 5911888 : JS_ASSERT(type == TOK_NUMBER);
401 5911888 : return u.number;
402 : }
403 : };
404 :
405 : #define t_op u.s.op
406 :
407 : enum TokenStreamFlags
408 : {
409 : TSF_EOF = 0x02, /* hit end of file */
410 : TSF_EOL = 0x04, /* an EOL was hit in whitespace or a multi-line comment */
411 : TSF_OPERAND = 0x08, /* looking for operand, not operator */
412 : TSF_UNEXPECTED_EOF = 0x10, /* unexpected end of input, i.e. TOK_EOF not at top-level. */
413 : TSF_KEYWORD_IS_NAME = 0x20, /* Ignore keywords and return TOK_NAME instead to the parser. */
414 : TSF_STRICT_MODE_CODE = 0x40,/* Tokenize as appropriate for strict mode code. */
415 : TSF_DIRTYLINE = 0x80, /* non-whitespace since start of line */
416 : TSF_OWNFILENAME = 0x100, /* ts->filename is malloc'd */
417 : TSF_XMLTAGMODE = 0x200, /* scanning within an XML tag in E4X */
418 : TSF_XMLTEXTMODE = 0x400, /* scanning XMLText terminal from E4X */
419 : TSF_XMLONLYMODE = 0x800, /* don't scan {expr} within text/tag */
420 : TSF_OCTAL_CHAR = 0x1000, /* observed a octal character escape */
421 :
422 : /*
423 : * To handle the hard case of contiguous HTML comments, we want to clear the
424 : * TSF_DIRTYINPUT flag at the end of each such comment. But we'd rather not
425 : * scan for --> within every //-style comment unless we have to. So we set
426 : * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
427 : * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
428 : * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
429 : *
430 : * This still works as before given a malformed comment hiding hack such as:
431 : *
432 : * <script>
433 : * <!-- comment hiding hack #1
434 : * code goes here
435 : * // --> oops, markup for script-unaware browsers goes here!
436 : * </script>
437 : *
438 : * It does not cope with malformed comment hiding hacks where --> is hidden
439 : * by C-style comments, or on a dirty line. Such cases are already broken.
440 : */
441 : TSF_IN_HTML_COMMENT = 0x2000
442 : };
443 :
444 : class TokenStream
445 : {
446 : /* Unicode separators that are treated as line terminators, in addition to \n, \r */
447 : enum {
448 : LINE_SEPARATOR = 0x2028,
449 : PARA_SEPARATOR = 0x2029
450 : };
451 :
452 : static const size_t ntokens = 4; /* 1 current + 2 lookahead, rounded
453 : to power of 2 to avoid divmod by 3 */
454 : static const unsigned ntokensMask = ntokens - 1;
455 :
456 : public:
457 : typedef Vector<jschar, 32> CharBuffer;
458 :
459 : /*
460 : * To construct a TokenStream, first call the constructor, which is
461 : * infallible, then call |init|, which can fail. To destroy a TokenStream,
462 : * first call |close| then call the destructor. If |init| fails, do not call
463 : * |close|.
464 : *
465 : * This class uses JSContext.tempLifoAlloc to allocate internal buffers. The
466 : * caller should JS_ARENA_MARK before calling |init| and JS_ARENA_RELEASE
467 : * after calling |close|.
468 : */
469 : TokenStream(JSContext *, JSPrincipals *principals, JSPrincipals *originPrincipals);
470 :
471 : /*
472 : * Create a new token stream from an input buffer.
473 : * Return false on memory-allocation failure.
474 : */
475 : bool init(const jschar *base, size_t length, const char *filename, unsigned lineno,
476 : JSVersion version);
477 : ~TokenStream();
478 :
479 : /* Accessors. */
480 21518 : JSContext *getContext() const { return cx; }
481 77481278 : bool onCurrentLine(const TokenPos &pos) const { return lineno == pos.end.lineno; }
482 893440357 : const Token ¤tToken() const { return tokens[cursor]; }
483 297848286 : bool isCurrentTokenType(TokenKind type) const {
484 297848286 : return currentToken().type == type;
485 : }
486 34170954 : bool isCurrentTokenType(TokenKind type1, TokenKind type2) const {
487 34170954 : TokenKind type = currentToken().type;
488 34170954 : return type == type1 || type == type2;
489 : }
490 68118 : const CharBuffer &getTokenbuf() const { return tokenbuf; }
491 1109121 : const char *getFilename() const { return filename; }
492 131370 : unsigned getLineno() const { return lineno; }
493 : /* Note that the version and hasXML can get out of sync via setXML. */
494 7805222 : JSVersion versionNumber() const { return VersionNumber(version); }
495 1229793 : JSVersion versionWithFlags() const { return version; }
496 3818 : bool hasXML() const { return xml || VersionShouldParseXML(versionNumber()); }
497 1898 : void setXML(bool enabled) { xml = enabled; }
498 :
499 31530335 : bool isCurrentTokenEquality() const {
500 31530335 : return TokenKindIsEquality(currentToken().type);
501 : }
502 :
503 31830518 : bool isCurrentTokenRelational() const {
504 31830518 : return TokenKindIsRelational(currentToken().type);
505 : }
506 :
507 31838494 : bool isCurrentTokenShift() const {
508 31838494 : return TokenKindIsShift(currentToken().type);
509 : }
510 :
511 27802301 : bool isCurrentTokenAssignment() const {
512 27802301 : return TokenKindIsAssignment(currentToken().type);
513 : }
514 :
515 : /* Flag methods. */
516 622856 : void setStrictMode(bool enabled = true) { setFlag(enabled, TSF_STRICT_MODE_CODE); }
517 5666 : void setXMLTagMode(bool enabled = true) { setFlag(enabled, TSF_XMLTAGMODE); }
518 1246 : void setXMLOnlyMode(bool enabled = true) { setFlag(enabled, TSF_XMLONLYMODE); }
519 11 : void setUnexpectedEOF(bool enabled = true) { setFlag(enabled, TSF_UNEXPECTED_EOF); }
520 2444740 : void setOctalCharacterEscape(bool enabled = true) { setFlag(enabled, TSF_OCTAL_CHAR); }
521 :
522 21079 : bool isStrictMode() { return !!(flags & TSF_STRICT_MODE_CODE); }
523 0 : bool isXMLTagMode() { return !!(flags & TSF_XMLTAGMODE); }
524 : bool isXMLOnlyMode() { return !!(flags & TSF_XMLONLYMODE); }
525 0 : bool isUnexpectedEOF() { return !!(flags & TSF_UNEXPECTED_EOF); }
526 371 : bool isEOF() const { return !!(flags & TSF_EOF); }
527 2949 : bool hasOctalCharacterEscape() const { return flags & TSF_OCTAL_CHAR; }
528 :
529 : bool reportCompileErrorNumberVA(ParseNode *pn, unsigned flags, unsigned errorNumber, va_list ap);
530 :
531 : private:
532 : static JSAtom *atomize(JSContext *cx, CharBuffer &cb);
533 : bool putIdentInTokenbuf(const jschar *identStart);
534 :
535 : /*
536 : * Enables flags in the associated tokenstream for the object lifetime.
537 : * Useful for lexically-scoped flag toggles.
538 : */
539 : class Flagger {
540 : TokenStream * const parent;
541 : unsigned flags;
542 : public:
543 157894455 : Flagger(TokenStream *parent, unsigned withFlags) : parent(parent), flags(withFlags) {
544 157894455 : parent->flags |= flags;
545 157894455 : }
546 :
547 157894455 : ~Flagger() { parent->flags &= ~flags; }
548 : };
549 : friend class Flagger;
550 :
551 3074519 : void setFlag(bool enabled, TokenStreamFlags flag) {
552 3074519 : if (enabled)
553 7778 : flags |= flag;
554 : else
555 3066741 : flags &= ~flag;
556 3074519 : }
557 :
558 : public:
559 : /*
560 : * Get the next token from the stream, make it the current token, and
561 : * return its kind.
562 : */
563 292889280 : TokenKind getToken() {
564 : /* Check for a pushed-back token resulting from mismatching lookahead. */
565 292889280 : if (lookahead != 0) {
566 212451184 : JS_ASSERT(!(flags & TSF_XMLTEXTMODE));
567 212451184 : lookahead--;
568 212451184 : cursor = (cursor + 1) & ntokensMask;
569 212451184 : TokenKind tt = currentToken().type;
570 212451184 : JS_ASSERT(tt != TOK_EOL);
571 212451184 : return tt;
572 : }
573 :
574 80438096 : return getTokenInternal();
575 : }
576 :
577 : /* Similar, but also sets flags. */
578 104091328 : TokenKind getToken(unsigned withFlags) {
579 208182656 : Flagger flagger(this, withFlags);
580 104091328 : return getToken();
581 : }
582 :
583 : /*
584 : * Push the last scanned token back into the stream.
585 : */
586 212571865 : void ungetToken() {
587 212571865 : JS_ASSERT(lookahead < ntokensMask);
588 212571865 : lookahead++;
589 212571865 : cursor = (cursor - 1) & ntokensMask;
590 212571865 : }
591 :
592 44947432 : TokenKind peekToken() {
593 44947432 : if (lookahead != 0) {
594 10073391 : JS_ASSERT(lookahead == 1);
595 10073391 : return tokens[(cursor + lookahead) & ntokensMask].type;
596 : }
597 34874041 : TokenKind tt = getTokenInternal();
598 34874041 : ungetToken();
599 34874041 : return tt;
600 : }
601 :
602 18032630 : TokenKind peekToken(unsigned withFlags) {
603 36065260 : Flagger flagger(this, withFlags);
604 18032630 : return peekToken();
605 : }
606 :
607 43317074 : TokenKind peekTokenSameLine(unsigned withFlags = 0) {
608 43317074 : if (!onCurrentLine(currentToken().pos))
609 98240 : return TOK_EOL;
610 :
611 43218834 : if (lookahead != 0) {
612 42332924 : JS_ASSERT(lookahead == 1);
613 42332924 : return tokens[(cursor + lookahead) & ntokensMask].type;
614 : }
615 :
616 : /*
617 : * This is the only place TOK_EOL is produced. No token with TOK_EOL
618 : * is created, just a TOK_EOL TokenKind is returned.
619 : */
620 885910 : flags &= ~TSF_EOL;
621 885910 : TokenKind tt = getToken(withFlags);
622 885910 : if (flags & TSF_EOL) {
623 1685 : tt = TOK_EOL;
624 1685 : flags &= ~TSF_EOL;
625 : }
626 885910 : ungetToken();
627 885910 : return tt;
628 : }
629 :
630 : /*
631 : * Get the next token from the stream if its kind is |tt|.
632 : */
633 93864166 : bool matchToken(TokenKind tt) {
634 93864166 : if (getToken() == tt)
635 20493812 : return true;
636 73370354 : ungetToken();
637 73370354 : return false;
638 : }
639 :
640 35770497 : bool matchToken(TokenKind tt, unsigned withFlags) {
641 71540994 : Flagger flagger(this, withFlags);
642 35770497 : return matchToken(tt);
643 : }
644 :
645 1748337 : void consumeKnownToken(TokenKind tt) {
646 1748337 : JS_ALWAYS_TRUE(matchToken(tt));
647 1748337 : }
648 :
649 : /*
650 : * Give up responsibility for managing the sourceMap filename's memory.
651 : */
652 1109121 : const jschar *releaseSourceMap() {
653 1109121 : const jschar* sm = sourceMap;
654 1109121 : sourceMap = NULL;
655 1109121 : return sm;
656 : }
657 :
658 : /*
659 : * If the name at s[0:length] is not a keyword in this version, return
660 : * true with *ttp and *topp unchanged.
661 : *
662 : * If it is a reserved word in this version and strictness mode, and thus
663 : * can't be present in correct code, report a SyntaxError and return false.
664 : *
665 : * If it is a keyword, like "if", the behavior depends on ttp/topp. If ttp
666 : * and topp are null, report a SyntaxError ("if is a reserved identifier")
667 : * and return false. If ttp and topp are non-null, return true with the
668 : * keyword's TokenKind in *ttp and its JSOp in *topp.
669 : *
670 : * ttp and topp must be either both null or both non-null.
671 : */
672 : bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp);
673 :
674 : private:
675 : /*
676 : * This is the low-level interface to the JS source code buffer. It just
677 : * gets raw chars, basically. TokenStreams functions are layered on top
678 : * and do some extra stuff like converting all EOL sequences to '\n',
679 : * tracking the line number, and setting the TSF_EOF flag. (The "raw" in
680 : * "raw chars" refers to the lack of EOL sequence normalization.)
681 : */
682 : class TokenBuf {
683 : public:
684 138998 : TokenBuf() : base(NULL), limit(NULL), ptr(NULL) { }
685 :
686 138998 : void init(const jschar *buf, size_t length) {
687 138998 : base = ptr = buf;
688 138998 : limit = base + length;
689 138998 : }
690 :
691 1270097546 : bool hasRawChars() const {
692 1270097546 : return ptr < limit;
693 : }
694 :
695 74511610 : bool atStart() const {
696 74511610 : return ptr == base;
697 : }
698 :
699 1269855264 : jschar getRawChar() {
700 1269855264 : return *ptr++; /* this will NULL-crash if poisoned */
701 : }
702 :
703 16362240 : jschar peekRawChar() const {
704 16362240 : return *ptr; /* this will NULL-crash if poisoned */
705 : }
706 :
707 56760 : bool matchRawChar(jschar c) {
708 56760 : if (*ptr == c) { /* this will NULL-crash if poisoned */
709 56760 : ptr++;
710 56760 : return true;
711 : }
712 0 : return false;
713 : }
714 :
715 1550754 : bool matchRawCharBackwards(jschar c) {
716 1550754 : JS_ASSERT(ptr); /* make sure haven't been poisoned */
717 1550754 : if (*(ptr - 1) == c) {
718 4633 : ptr--;
719 4633 : return true;
720 : }
721 1546121 : return false;
722 : }
723 :
724 72960856 : void ungetRawChar() {
725 72960856 : JS_ASSERT(ptr); /* make sure haven't been poisoned */
726 72960856 : ptr--;
727 72960856 : }
728 :
729 345262473 : const jschar *addressOfNextRawChar() {
730 345262473 : JS_ASSERT(ptr); /* make sure haven't been poisoned */
731 345262473 : return ptr;
732 : }
733 :
734 : /* Use this with caution! */
735 0 : void setAddressOfNextRawChar(const jschar *a) {
736 0 : JS_ASSERT(a);
737 0 : ptr = a;
738 0 : }
739 :
740 : #ifdef DEBUG
741 : /*
742 : * Poison the TokenBuf so it cannot be accessed again. There's one
743 : * exception to this rule -- see findEOL() -- which is why
744 : * ptrWhenPoisoned exists.
745 : */
746 18 : void poison() {
747 18 : ptrWhenPoisoned = ptr;
748 18 : ptr = NULL;
749 18 : }
750 : #endif
751 :
752 5427304 : static bool isRawEOLChar(int32_t c) {
753 5427304 : return (c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR);
754 : }
755 :
756 : const jschar *findEOL();
757 :
758 : private:
759 : const jschar *base; /* base of buffer */
760 : const jschar *limit; /* limit for quick bounds check */
761 : const jschar *ptr; /* next char to get */
762 : const jschar *ptrWhenPoisoned; /* |ptr| when poison() was called */
763 : };
764 :
765 : TokenKind getTokenInternal(); /* doesn't check for pushback or error flag. */
766 :
767 : int32_t getChar();
768 : int32_t getCharIgnoreEOL();
769 : void ungetChar(int32_t c);
770 : void ungetCharIgnoreEOL(int32_t c);
771 : Token *newToken(ptrdiff_t adjust);
772 : bool peekUnicodeEscape(int32_t *c);
773 : bool matchUnicodeEscapeIdStart(int32_t *c);
774 : bool matchUnicodeEscapeIdent(int32_t *c);
775 : bool peekChars(int n, jschar *cp);
776 : bool getAtLine();
777 : bool getAtSourceMappingURL();
778 :
779 : bool getXMLEntity();
780 : bool getXMLTextOrTag(TokenKind *ttp, Token **tpp);
781 : bool getXMLMarkup(TokenKind *ttp, Token **tpp);
782 :
783 19445232 : bool matchChar(int32_t expect) {
784 19445232 : int32_t c = getChar();
785 19445232 : if (c == expect)
786 3827116 : return true;
787 15618116 : ungetChar(c);
788 15618116 : return false;
789 : }
790 :
791 54 : void consumeKnownChar(int32_t expect) {
792 108 : mozilla::DebugOnly<int32_t> c = getChar();
793 54 : JS_ASSERT(c == expect);
794 54 : }
795 :
796 100759 : int32_t peekChar() {
797 100759 : int32_t c = getChar();
798 100759 : ungetChar(c);
799 100759 : return c;
800 : }
801 :
802 100916 : void skipChars(int n) {
803 580782 : while (--n >= 0)
804 378950 : getChar();
805 100916 : }
806 :
807 : void updateLineInfoForEOL();
808 : void updateFlagsForEOL();
809 :
810 : Token tokens[ntokens];/* circular token buffer */
811 : unsigned cursor; /* index of last parsed token */
812 : unsigned lookahead; /* count of lookahead tokens */
813 : unsigned lineno; /* current line number */
814 : unsigned flags; /* flags -- see above */
815 : const jschar *linebase; /* start of current line; points into userbuf */
816 : const jschar *prevLinebase; /* start of previous line; NULL if on the first line */
817 : TokenBuf userbuf; /* user input buffer */
818 : const char *filename; /* input filename or null */
819 : jschar *sourceMap; /* source map's filename or null */
820 : void *listenerTSData;/* listener data for this TokenStream */
821 : CharBuffer tokenbuf; /* current token string buffer */
822 : int8_t oneCharTokens[128]; /* table of one-char tokens */
823 : bool maybeEOL[256]; /* probabilistic EOL lookup table */
824 : bool maybeStrSpecial[256];/* speeds up string scanning */
825 : JSVersion version; /* (i.e. to identify keywords) */
826 : bool xml; /* see JSOPTION_XML */
827 : JSContext *const cx;
828 : JSPrincipals *const originPrincipals;
829 : };
830 :
831 : struct KeywordInfo {
832 : const char *chars; /* C string with keyword text */
833 : TokenKind tokentype;
834 : JSOp op; /* JSOp */
835 : JSVersion version; /* JSVersion */
836 : };
837 :
838 : /*
839 : * Returns a KeywordInfo for the specified characters, or NULL if the string is
840 : * not a keyword.
841 : */
842 : const KeywordInfo *
843 : FindKeyword(const jschar *s, size_t length);
844 :
845 : /*
846 : * Check that str forms a valid JS identifier name. The function does not
847 : * check if str is a JS keyword.
848 : */
849 : JSBool
850 : IsIdentifier(JSLinearString *str);
851 :
852 : /*
853 : * Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
854 : * message have const jschar* type, not const char*.
855 : */
856 : #define JSREPORT_UC 0x100
857 :
858 : /*
859 : * Report a compile-time error by its number. Return true for a warning, false
860 : * for an error. When pn is not null, use it to report error's location.
861 : * Otherwise use ts, which must not be null.
862 : */
863 : bool
864 : ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, ParseNode *pn, unsigned flags,
865 : unsigned errorNumber, ...);
866 :
867 : /*
868 : * Report a condition that should elicit a warning with JSOPTION_STRICT,
869 : * or an error if ts or tc is handling strict mode code. This function
870 : * defers to ReportCompileErrorNumber to do the real work. Either tc
871 : * or ts may be NULL, if there is no tree context or token stream state
872 : * whose strictness should affect the report.
873 : *
874 : * One could have ReportCompileErrorNumber recognize the
875 : * JSREPORT_STRICT_MODE_ERROR flag instead of having a separate function
876 : * like this one. However, the strict mode code flag we need to test is
877 : * in the TreeContext structure for that code; we would have to change
878 : * the ~120 ReportCompileErrorNumber calls to pass the additional
879 : * argument, even though many of those sites would never use it. Using
880 : * ts's TSF_STRICT_MODE_CODE flag instead of tc's would be brittle: at some
881 : * points ts's flags don't correspond to those of the tc relevant to the
882 : * error.
883 : */
884 : bool
885 : ReportStrictModeError(JSContext *cx, TokenStream *ts, TreeContext *tc, ParseNode *pn,
886 : unsigned errorNumber, ...);
887 :
888 : } /* namespace js */
889 :
890 : extern JS_FRIEND_API(int)
891 : js_fgets(char *buf, int size, FILE *file);
892 :
893 : #ifdef DEBUG
894 : extern const char *
895 : TokenKindToString(js::TokenKind tt);
896 : #endif
897 :
898 : #endif /* TokenStream_h__ */
|