1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sw=4 et tw=99 ft=cpp:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Copyright (C) 2009 Apple Inc. All rights reserved.
6 : * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
7 : *
8 : * Redistribution and use in source and binary forms, with or without
9 : * modification, are permitted provided that the following conditions
10 : * are met:
11 : * 1. Redistributions of source code must retain the above copyright
12 : * notice, this list of conditions and the following disclaimer.
13 : * 2. Redistributions in binary form must reproduce the above copyright
14 : * notice, this list of conditions and the following disclaimer in the
15 : * documentation and/or other materials provided with the distribution.
16 : *
17 : * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
18 : * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 : * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
21 : * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 : * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 : * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
25 : * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 : * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 : *
29 : * ***** END LICENSE BLOCK ***** */
30 :
31 : #include "YarrInterpreter.h"
32 :
33 : #include "Yarr.h"
34 : #include "BumpPointerAllocator.h"
35 :
36 : #ifndef NDEBUG
37 : #include <stdio.h>
38 : #endif
39 :
40 : using namespace WTF;
41 :
42 : namespace JSC { namespace Yarr {
43 :
44 : class Interpreter {
45 : public:
46 : struct ParenthesesDisjunctionContext;
47 :
48 : struct BackTrackInfoPatternCharacter {
49 : uintptr_t matchAmount;
50 : };
51 : struct BackTrackInfoCharacterClass {
52 : uintptr_t matchAmount;
53 : };
54 : struct BackTrackInfoBackReference {
55 : uintptr_t begin; // Not really needed for greedy quantifiers.
56 : uintptr_t matchAmount; // Not really needed for fixed quantifiers.
57 : };
58 : struct BackTrackInfoAlternative {
59 : uintptr_t offset;
60 : };
61 : struct BackTrackInfoParentheticalAssertion {
62 : uintptr_t begin;
63 : };
64 : struct BackTrackInfoParenthesesOnce {
65 : uintptr_t begin;
66 : };
67 : struct BackTrackInfoParenthesesTerminal {
68 : uintptr_t begin;
69 : };
70 : struct BackTrackInfoParentheses {
71 : uintptr_t matchAmount;
72 : ParenthesesDisjunctionContext* lastContext;
73 : };
74 :
75 4618613 : static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
76 : {
77 4618613 : context->next = backTrack->lastContext;
78 4618613 : backTrack->lastContext = context;
79 4618613 : ++backTrack->matchAmount;
80 4618613 : }
81 :
82 4029 : static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
83 : {
84 4029 : ASSERT(backTrack->matchAmount);
85 4029 : ASSERT(backTrack->lastContext);
86 4029 : backTrack->lastContext = backTrack->lastContext->next;
87 4029 : --backTrack->matchAmount;
88 4029 : }
89 :
90 : struct DisjunctionContext
91 : {
92 4749033 : DisjunctionContext()
93 4749033 : : term(0)
94 : {
95 4749033 : }
96 :
97 4749033 : void* operator new(size_t, void* where)
98 : {
99 4749033 : return where;
100 : }
101 :
102 : int term;
103 : unsigned matchBegin;
104 : unsigned matchEnd;
105 : uintptr_t frame[1];
106 : };
107 :
108 67716 : DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
109 : {
110 67716 : size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
111 67716 : allocatorPool = allocatorPool->ensureCapacity(size);
112 67716 : if (!allocatorPool)
113 0 : CRASH();
114 67716 : return new(allocatorPool->alloc(size)) DisjunctionContext();
115 : }
116 :
117 67716 : void freeDisjunctionContext(DisjunctionContext* context)
118 : {
119 67716 : allocatorPool = allocatorPool->dealloc(context);
120 67716 : }
121 :
122 : struct ParenthesesDisjunctionContext
123 : {
124 4681317 : ParenthesesDisjunctionContext(int* output, ByteTerm& term)
125 4681317 : : next(0)
126 : {
127 4681317 : unsigned firstSubpatternId = term.atom.subpatternId;
128 4681317 : unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
129 :
130 13967109 : for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
131 9285792 : subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
132 9285792 : output[(firstSubpatternId << 1) + i] = -1;
133 : }
134 :
135 4681317 : new(getDisjunctionContext(term)) DisjunctionContext();
136 4681317 : }
137 :
138 4681317 : void* operator new(size_t, void* where)
139 : {
140 4681317 : return where;
141 : }
142 :
143 66733 : void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
144 : {
145 163987 : for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
146 97254 : output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
147 66733 : }
148 :
149 18404807 : DisjunctionContext* getDisjunctionContext(ByteTerm& term)
150 : {
151 18404807 : return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
152 : }
153 :
154 : ParenthesesDisjunctionContext* next;
155 : int subpatternBackup[1];
156 : };
157 :
158 4681317 : ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
159 : {
160 4681317 : size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
161 4681317 : allocatorPool = allocatorPool->ensureCapacity(size);
162 4681317 : if (!allocatorPool)
163 0 : CRASH();
164 4681317 : return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
165 : }
166 :
167 66733 : void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
168 : {
169 66733 : allocatorPool = allocatorPool->dealloc(context);
170 66733 : }
171 :
172 : class InputStream {
173 : public:
174 67716 : InputStream(const UChar* input, unsigned start, unsigned length)
175 : : input(input)
176 : , pos(start)
177 67716 : , length(length)
178 : {
179 67716 : }
180 :
181 76835 : void next()
182 : {
183 76835 : ++pos;
184 76835 : }
185 :
186 0 : void rewind(unsigned amount)
187 : {
188 0 : ASSERT(pos >= amount);
189 0 : pos -= amount;
190 0 : }
191 :
192 0 : int read()
193 : {
194 0 : ASSERT(pos < length);
195 0 : if (pos < length)
196 0 : return input[pos];
197 0 : return -1;
198 : }
199 :
200 : int readPair()
201 : {
202 : ASSERT(pos + 1 < length);
203 : return input[pos] | input[pos + 1] << 16;
204 : }
205 :
206 12252979 : int readChecked(int position)
207 : {
208 12252979 : ASSERT(position < 0);
209 12252979 : ASSERT(static_cast<unsigned>(-position) <= pos);
210 12252979 : unsigned p = pos + position;
211 12252979 : ASSERT(p < length);
212 12252979 : return input[p];
213 : }
214 :
215 36 : int reread(unsigned from)
216 : {
217 36 : ASSERT(from < length);
218 36 : return input[from];
219 : }
220 :
221 : int prev()
222 : {
223 : ASSERT(!(pos > length));
224 : if (pos && length)
225 : return input[pos - 1];
226 : return -1;
227 : }
228 :
229 9756265 : unsigned getPos()
230 : {
231 9756265 : return pos;
232 : }
233 :
234 171 : void setPos(unsigned p)
235 : {
236 171 : pos = p;
237 171 : }
238 :
239 : bool atStart()
240 : {
241 : return pos == 0;
242 : }
243 :
244 135013 : bool atEnd()
245 : {
246 135013 : return pos == length;
247 : }
248 :
249 7783297 : bool checkInput(int count)
250 : {
251 7783297 : if ((pos + count) <= length) {
252 7618932 : pos += count;
253 7618932 : return true;
254 : }
255 164365 : return false;
256 : }
257 :
258 2550961 : void uncheckInput(int count)
259 : {
260 2550961 : pos -= count;
261 2550961 : }
262 :
263 117064 : bool atStart(int position)
264 : {
265 117064 : return (pos + position) == 0;
266 : }
267 :
268 0 : bool atEnd(int position)
269 : {
270 0 : return (pos + position) == length;
271 : }
272 :
273 : bool isNotAvailableInput(int position)
274 : {
275 : return (pos + position) > length;
276 : }
277 :
278 : private:
279 : const UChar* input;
280 : unsigned pos;
281 : unsigned length;
282 : };
283 :
284 7635078 : bool testCharacterClass(CharacterClass* characterClass, int ch)
285 : {
286 7635078 : if (ch & 0xFF80) {
287 0 : for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
288 0 : if (ch == characterClass->m_matchesUnicode[i])
289 0 : return true;
290 0 : for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
291 0 : if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
292 0 : return true;
293 : } else {
294 17329349 : for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
295 9768560 : if (ch == characterClass->m_matches[i])
296 74289 : return true;
297 10829881 : for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
298 5826885 : if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
299 2557793 : return true;
300 : }
301 :
302 5002996 : return false;
303 : }
304 :
305 116902 : bool checkCharacter(int testChar, int inputPosition)
306 : {
307 116902 : return testChar == input.readChecked(inputPosition);
308 : }
309 :
310 4500999 : bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
311 : {
312 4500999 : int ch = input.readChecked(inputPosition);
313 4500999 : return (loChar == ch) || (hiChar == ch);
314 : }
315 :
316 7635078 : bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
317 : {
318 7635078 : bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
319 7635078 : return invert ? !match : match;
320 : }
321 :
322 36 : bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
323 : {
324 36 : int matchSize = matchEnd - matchBegin;
325 :
326 36 : if (!input.checkInput(matchSize))
327 0 : return false;
328 :
329 36 : if (pattern->m_ignoreCase) {
330 45 : for (int i = 0; i < matchSize; ++i) {
331 36 : int ch = input.reread(matchBegin + i);
332 :
333 36 : int lo = Unicode::toLower(ch);
334 36 : int hi = Unicode::toUpper(ch);
335 :
336 36 : if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
337 27 : input.uncheckInput(matchSize);
338 27 : return false;
339 : }
340 : }
341 : } else {
342 0 : for (int i = 0; i < matchSize; ++i) {
343 0 : if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
344 0 : input.uncheckInput(matchSize);
345 0 : return false;
346 : }
347 : }
348 : }
349 :
350 9 : return true;
351 : }
352 :
353 117064 : bool matchAssertionBOL(ByteTerm& term)
354 : {
355 117064 : return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
356 : }
357 :
358 53476 : bool matchAssertionEOL(ByteTerm& term)
359 : {
360 53476 : if (term.inputPosition)
361 0 : return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
362 :
363 53476 : return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
364 : }
365 :
366 0 : bool matchAssertionWordBoundary(ByteTerm& term)
367 : {
368 0 : bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
369 : bool readIsWordchar;
370 0 : if (term.inputPosition)
371 0 : readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
372 : else
373 0 : readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
374 :
375 0 : bool wordBoundary = prevIsWordchar != readIsWordchar;
376 0 : return term.invert() ? !wordBoundary : wordBoundary;
377 : }
378 :
379 4197 : bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
380 : {
381 4197 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
382 :
383 4197 : switch (term.atom.quantityType) {
384 : case QuantifierFixedCount:
385 3027 : break;
386 :
387 : case QuantifierGreedy:
388 1170 : if (backTrack->matchAmount) {
389 0 : --backTrack->matchAmount;
390 0 : input.uncheckInput(1);
391 0 : return true;
392 : }
393 1170 : break;
394 :
395 : case QuantifierNonGreedy:
396 0 : if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
397 0 : ++backTrack->matchAmount;
398 0 : if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
399 0 : return true;
400 : }
401 0 : input.uncheckInput(backTrack->matchAmount);
402 0 : break;
403 : }
404 :
405 4197 : return false;
406 : }
407 :
408 117 : bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
409 : {
410 117 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
411 :
412 117 : switch (term.atom.quantityType) {
413 : case QuantifierFixedCount:
414 117 : break;
415 :
416 : case QuantifierGreedy:
417 0 : if (backTrack->matchAmount) {
418 0 : --backTrack->matchAmount;
419 0 : input.uncheckInput(1);
420 0 : return true;
421 : }
422 0 : break;
423 :
424 : case QuantifierNonGreedy:
425 0 : if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
426 0 : ++backTrack->matchAmount;
427 0 : if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
428 0 : return true;
429 : }
430 0 : input.uncheckInput(backTrack->matchAmount);
431 0 : break;
432 : }
433 :
434 117 : return false;
435 : }
436 :
437 7247118 : bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
438 : {
439 7247118 : ASSERT(term.type == ByteTerm::TypeCharacterClass);
440 7247118 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
441 :
442 7247118 : switch (term.atom.quantityType) {
443 : case QuantifierFixedCount: {
444 11787076 : for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
445 7038001 : if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
446 2288889 : return false;
447 : }
448 4749075 : return true;
449 : }
450 :
451 : case QuantifierGreedy: {
452 208740 : unsigned matchAmount = 0;
453 912268 : while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
454 595673 : if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
455 100885 : input.uncheckInput(1);
456 100885 : break;
457 : }
458 494788 : ++matchAmount;
459 : }
460 208740 : backTrack->matchAmount = matchAmount;
461 :
462 208740 : return true;
463 : }
464 :
465 : case QuantifierNonGreedy:
466 414 : backTrack->matchAmount = 0;
467 414 : return true;
468 : }
469 :
470 0 : ASSERT_NOT_REACHED();
471 : return false;
472 : }
473 :
474 163310 : bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
475 : {
476 163310 : ASSERT(term.type == ByteTerm::TypeCharacterClass);
477 163310 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
478 :
479 163310 : switch (term.atom.quantityType) {
480 : case QuantifierFixedCount:
481 74790 : break;
482 :
483 : case QuantifierGreedy:
484 87107 : if (backTrack->matchAmount) {
485 73117 : --backTrack->matchAmount;
486 73117 : input.uncheckInput(1);
487 73117 : return true;
488 : }
489 13990 : break;
490 :
491 : case QuantifierNonGreedy:
492 1413 : if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
493 1404 : ++backTrack->matchAmount;
494 1404 : if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
495 1404 : return true;
496 : }
497 9 : input.uncheckInput(backTrack->matchAmount);
498 9 : break;
499 : }
500 :
501 88789 : return false;
502 : }
503 :
504 1386 : bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
505 : {
506 1386 : ASSERT(term.type == ByteTerm::TypeBackReference);
507 1386 : BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
508 :
509 1386 : int matchBegin = output[(term.atom.subpatternId << 1)];
510 1386 : int matchEnd = output[(term.atom.subpatternId << 1) + 1];
511 :
512 : // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
513 : // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
514 : // Eg.: /(a\1)/
515 1386 : if (matchEnd == -1)
516 0 : return true;
517 :
518 1386 : ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
519 :
520 1386 : if (matchBegin == matchEnd)
521 1350 : return true;
522 :
523 36 : switch (term.atom.quantityType) {
524 : case QuantifierFixedCount: {
525 36 : backTrack->begin = input.getPos();
526 45 : for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
527 36 : if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
528 27 : input.setPos(backTrack->begin);
529 27 : return false;
530 : }
531 : }
532 9 : return true;
533 : }
534 :
535 : case QuantifierGreedy: {
536 0 : unsigned matchAmount = 0;
537 0 : while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
538 0 : ++matchAmount;
539 0 : backTrack->matchAmount = matchAmount;
540 0 : return true;
541 : }
542 :
543 : case QuantifierNonGreedy:
544 0 : backTrack->begin = input.getPos();
545 0 : backTrack->matchAmount = 0;
546 0 : return true;
547 : }
548 :
549 0 : ASSERT_NOT_REACHED();
550 : return false;
551 : }
552 :
553 1170 : bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
554 : {
555 1170 : ASSERT(term.type == ByteTerm::TypeBackReference);
556 1170 : BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
557 :
558 1170 : int matchBegin = output[(term.atom.subpatternId << 1)];
559 1170 : int matchEnd = output[(term.atom.subpatternId << 1) + 1];
560 1170 : ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
561 :
562 1170 : if (matchBegin == matchEnd)
563 1170 : return false;
564 :
565 0 : switch (term.atom.quantityType) {
566 : case QuantifierFixedCount:
567 : // for quantityCount == 1, could rewind.
568 0 : input.setPos(backTrack->begin);
569 0 : break;
570 :
571 : case QuantifierGreedy:
572 0 : if (backTrack->matchAmount) {
573 0 : --backTrack->matchAmount;
574 0 : input.rewind(matchEnd - matchBegin);
575 0 : return true;
576 : }
577 0 : break;
578 :
579 : case QuantifierNonGreedy:
580 0 : if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
581 0 : ++backTrack->matchAmount;
582 0 : return true;
583 : }
584 0 : input.setPos(backTrack->begin);
585 0 : break;
586 : }
587 :
588 0 : return false;
589 : }
590 :
591 4525969 : void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
592 : {
593 4525969 : if (term.capture()) {
594 4519072 : unsigned subpatternId = term.atom.subpatternId;
595 4519072 : output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
596 4519072 : output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
597 : }
598 4525969 : }
599 66733 : void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
600 : {
601 66733 : unsigned firstSubpatternId = term.atom.subpatternId;
602 66733 : unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
603 66733 : context->restoreOutput(output, firstSubpatternId, count);
604 66733 : }
605 0 : JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
606 : {
607 0 : while (backTrack->matchAmount) {
608 0 : ParenthesesDisjunctionContext* context = backTrack->lastContext;
609 :
610 0 : JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
611 0 : if (result == JSRegExpMatch)
612 0 : return JSRegExpMatch;
613 :
614 0 : resetMatches(term, context);
615 0 : popParenthesesDisjunctionContext(backTrack);
616 0 : freeParenthesesDisjunctionContext(context);
617 :
618 0 : if (result != JSRegExpNoMatch)
619 0 : return result;
620 : }
621 :
622 0 : return JSRegExpNoMatch;
623 : }
624 :
625 129837 : bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
626 : {
627 129837 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
628 129837 : ASSERT(term.atom.quantityCount == 1);
629 :
630 129837 : BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
631 :
632 129837 : switch (term.atom.quantityType) {
633 : case QuantifierGreedy: {
634 : // set this speculatively; if we get to the parens end this will be true.
635 22673 : backTrack->begin = input.getPos();
636 22673 : break;
637 : }
638 : case QuantifierNonGreedy: {
639 0 : backTrack->begin = notFound;
640 0 : context->term += term.atom.parenthesesWidth;
641 0 : return true;
642 : }
643 : case QuantifierFixedCount:
644 107164 : break;
645 : }
646 :
647 129837 : if (term.capture()) {
648 94712 : unsigned subpatternId = term.atom.subpatternId;
649 94712 : output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
650 : }
651 :
652 129837 : return true;
653 : }
654 :
655 142299 : bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
656 : {
657 142299 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
658 142299 : ASSERT(term.atom.quantityCount == 1);
659 :
660 142299 : if (term.capture()) {
661 60782 : unsigned subpatternId = term.atom.subpatternId;
662 60782 : output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
663 : }
664 :
665 142299 : if (term.atom.quantityType == QuantifierFixedCount)
666 72407 : return true;
667 :
668 69892 : BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
669 69892 : return backTrack->begin != input.getPos();
670 : }
671 :
672 52914 : bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
673 : {
674 52914 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
675 52914 : ASSERT(term.atom.quantityCount == 1);
676 :
677 52914 : BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
678 :
679 52914 : if (term.capture()) {
680 38559 : unsigned subpatternId = term.atom.subpatternId;
681 38559 : output[(subpatternId << 1)] = -1;
682 38559 : output[(subpatternId << 1) + 1] = -1;
683 : }
684 :
685 52914 : switch (term.atom.quantityType) {
686 : case QuantifierGreedy:
687 : // if we backtrack to this point, there is another chance - try matching nothing.
688 12905 : ASSERT(backTrack->begin != notFound);
689 12905 : backTrack->begin = notFound;
690 12905 : context->term += term.atom.parenthesesWidth;
691 12905 : return true;
692 : case QuantifierNonGreedy:
693 0 : ASSERT(backTrack->begin != notFound);
694 : case QuantifierFixedCount:
695 40009 : break;
696 : }
697 :
698 40009 : return false;
699 : }
700 :
701 78065 : bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
702 : {
703 78065 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
704 78065 : ASSERT(term.atom.quantityCount == 1);
705 :
706 78065 : BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
707 :
708 78065 : switch (term.atom.quantityType) {
709 : case QuantifierGreedy:
710 72813 : if (backTrack->begin == notFound) {
711 12698 : context->term -= term.atom.parenthesesWidth;
712 12698 : return false;
713 : }
714 : case QuantifierNonGreedy:
715 60115 : if (backTrack->begin == notFound) {
716 0 : backTrack->begin = input.getPos();
717 0 : if (term.capture()) {
718 : // Technically this access to inputPosition should be accessing the begin term's
719 : // inputPosition, but for repeats other than fixed these values should be
720 : // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
721 0 : ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
722 0 : ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
723 0 : unsigned subpatternId = term.atom.subpatternId;
724 0 : output[subpatternId << 1] = input.getPos() + term.inputPosition;
725 : }
726 0 : context->term -= term.atom.parenthesesWidth;
727 0 : return true;
728 : }
729 : case QuantifierFixedCount:
730 65367 : break;
731 : }
732 :
733 65367 : return false;
734 : }
735 :
736 45 : bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
737 : {
738 45 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
739 45 : ASSERT(term.atom.quantityType == QuantifierGreedy);
740 45 : ASSERT(term.atom.quantityCount == quantifyInfinite);
741 45 : ASSERT(!term.capture());
742 :
743 45 : BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
744 45 : backTrack->begin = input.getPos();
745 45 : return true;
746 : }
747 :
748 81 : bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
749 : {
750 81 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
751 :
752 81 : BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
753 : // Empty match is a failed match.
754 81 : if (backTrack->begin == input.getPos())
755 45 : return false;
756 :
757 : // Successful match! Okay, what's next? - loop around and try to match moar!
758 36 : context->term -= (term.atom.parenthesesWidth + 1);
759 36 : return true;
760 : }
761 :
762 9 : bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
763 : {
764 9 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
765 9 : ASSERT(term.atom.quantityType == QuantifierGreedy);
766 9 : ASSERT(term.atom.quantityCount == quantifyInfinite);
767 9 : ASSERT(!term.capture());
768 :
769 : // If we backtrack to this point, we have failed to match this iteration of the parens.
770 : // Since this is greedy / zero minimum a failed is also accepted as a match!
771 9 : context->term += term.atom.parenthesesWidth;
772 9 : return true;
773 : }
774 :
775 0 : bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
776 : {
777 : // 'Terminal' parentheses are at the end of the regex, and as such a match past end
778 : // should always be returned as a successful match - we should never backtrack to here.
779 0 : ASSERT_NOT_REACHED();
780 : return false;
781 : }
782 :
783 288 : bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
784 : {
785 288 : ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
786 288 : ASSERT(term.atom.quantityCount == 1);
787 :
788 288 : BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
789 :
790 288 : backTrack->begin = input.getPos();
791 288 : return true;
792 : }
793 :
794 18 : bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
795 : {
796 18 : ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
797 18 : ASSERT(term.atom.quantityCount == 1);
798 :
799 18 : BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
800 :
801 18 : input.setPos(backTrack->begin);
802 :
803 : // We've reached the end of the parens; if they are inverted, this is failure.
804 18 : if (term.invert()) {
805 9 : context->term -= term.atom.parenthesesWidth;
806 9 : return false;
807 : }
808 :
809 9 : return true;
810 : }
811 :
812 270 : bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
813 : {
814 270 : ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
815 270 : ASSERT(term.atom.quantityCount == 1);
816 :
817 : // We've failed to match parens; if they are inverted, this is win!
818 270 : if (term.invert()) {
819 270 : context->term += term.atom.parenthesesWidth;
820 270 : return true;
821 : }
822 :
823 0 : return false;
824 : }
825 :
826 126 : bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
827 : {
828 126 : ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
829 126 : ASSERT(term.atom.quantityCount == 1);
830 :
831 126 : BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
832 :
833 126 : input.setPos(backTrack->begin);
834 :
835 126 : context->term -= term.atom.parenthesesWidth;
836 126 : return false;
837 : }
838 :
839 62731 : JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
840 : {
841 62731 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
842 :
843 62731 : BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
844 62731 : ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
845 :
846 62731 : backTrack->matchAmount = 0;
847 62731 : backTrack->lastContext = 0;
848 :
849 62731 : switch (term.atom.quantityType) {
850 : case QuantifierFixedCount: {
851 : // While we haven't yet reached our fixed limit,
852 0 : while (backTrack->matchAmount < term.atom.quantityCount) {
853 : // Try to do a match, and it it succeeds, add it to the list.
854 0 : ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
855 0 : JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
856 0 : if (result == JSRegExpMatch)
857 0 : appendParenthesesDisjunctionContext(backTrack, context);
858 : else {
859 : // The match failed; try to find an alternate point to carry on from.
860 0 : resetMatches(term, context);
861 0 : freeParenthesesDisjunctionContext(context);
862 :
863 0 : if (result != JSRegExpNoMatch)
864 0 : return result;
865 0 : JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
866 0 : if (backtrackResult != JSRegExpMatch)
867 0 : return backtrackResult;
868 : }
869 : }
870 :
871 0 : ASSERT(backTrack->matchAmount == term.atom.quantityCount);
872 0 : ParenthesesDisjunctionContext* context = backTrack->lastContext;
873 0 : recordParenthesesMatch(term, context);
874 0 : return JSRegExpMatch;
875 : }
876 :
877 : case QuantifierGreedy: {
878 243742 : while (backTrack->matchAmount < term.atom.quantityCount) {
879 181047 : ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
880 181047 : JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
881 181047 : if (result == JSRegExpMatch)
882 118352 : appendParenthesesDisjunctionContext(backTrack, context);
883 : else {
884 62695 : resetMatches(term, context);
885 62695 : freeParenthesesDisjunctionContext(context);
886 :
887 62695 : if (result != JSRegExpNoMatch)
888 0 : return result;
889 :
890 62695 : break;
891 : }
892 : }
893 :
894 62695 : if (backTrack->matchAmount) {
895 23159 : ParenthesesDisjunctionContext* context = backTrack->lastContext;
896 23159 : recordParenthesesMatch(term, context);
897 : }
898 62695 : return JSRegExpMatch;
899 : }
900 :
901 : case QuantifierNonGreedy:
902 36 : return JSRegExpMatch;
903 : }
904 :
905 0 : ASSERT_NOT_REACHED();
906 : return JSRegExpErrorNoMatch;
907 : }
908 :
909 : // Rules for backtracking differ depending on whether this is greedy or non-greedy.
910 : //
911 : // Greedy matches never should try just adding more - you should already have done
912 : // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
913 : // you backtrack an item off the list needs checking, since we'll never have matched
914 : // the one less case. Tracking forwards, still add as much as possible.
915 : //
916 : // Non-greedy, we've already done the one less case, so don't match on popping.
917 : // We haven't done the one more case, so always try to add that.
918 : //
919 4505726 : JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
920 : {
921 4505726 : ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
922 :
923 4505726 : BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
924 4505726 : ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
925 :
926 4505726 : switch (term.atom.quantityType) {
927 : case QuantifierFixedCount: {
928 0 : ASSERT(backTrack->matchAmount == term.atom.quantityCount);
929 :
930 0 : ParenthesesDisjunctionContext* context = 0;
931 0 : JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
932 :
933 0 : if (result != JSRegExpMatch)
934 0 : return result;
935 :
936 : // While we haven't yet reached our fixed limit,
937 0 : while (backTrack->matchAmount < term.atom.quantityCount) {
938 : // Try to do a match, and it it succeeds, add it to the list.
939 0 : context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
940 0 : result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
941 :
942 0 : if (result == JSRegExpMatch)
943 0 : appendParenthesesDisjunctionContext(backTrack, context);
944 : else {
945 : // The match failed; try to find an alternate point to carry on from.
946 0 : resetMatches(term, context);
947 0 : freeParenthesesDisjunctionContext(context);
948 :
949 0 : if (result != JSRegExpNoMatch)
950 0 : return result;
951 0 : JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
952 0 : if (backtrackResult != JSRegExpMatch)
953 0 : return backtrackResult;
954 : }
955 : }
956 :
957 0 : ASSERT(backTrack->matchAmount == term.atom.quantityCount);
958 0 : context = backTrack->lastContext;
959 0 : recordParenthesesMatch(term, context);
960 0 : return JSRegExpMatch;
961 : }
962 :
963 : case QuantifierGreedy: {
964 5456 : if (!backTrack->matchAmount)
965 1544 : return JSRegExpNoMatch;
966 :
967 3912 : ParenthesesDisjunctionContext* context = backTrack->lastContext;
968 3912 : JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
969 3912 : if (result == JSRegExpMatch) {
970 0 : while (backTrack->matchAmount < term.atom.quantityCount) {
971 0 : ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
972 0 : JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
973 0 : if (parenthesesResult == JSRegExpMatch)
974 0 : appendParenthesesDisjunctionContext(backTrack, context);
975 : else {
976 0 : resetMatches(term, context);
977 0 : freeParenthesesDisjunctionContext(context);
978 :
979 0 : if (parenthesesResult != JSRegExpNoMatch)
980 0 : return parenthesesResult;
981 :
982 0 : break;
983 : }
984 : }
985 : } else {
986 3912 : resetMatches(term, context);
987 3912 : popParenthesesDisjunctionContext(backTrack);
988 3912 : freeParenthesesDisjunctionContext(context);
989 :
990 3912 : if (result != JSRegExpNoMatch)
991 0 : return result;
992 : }
993 :
994 3912 : if (backTrack->matchAmount) {
995 2549 : ParenthesesDisjunctionContext* context = backTrack->lastContext;
996 2549 : recordParenthesesMatch(term, context);
997 : }
998 3912 : return JSRegExpMatch;
999 : }
1000 :
1001 : case QuantifierNonGreedy: {
1002 : // If we've not reached the limit, try to add one more match.
1003 4500270 : if (backTrack->matchAmount < term.atom.quantityCount) {
1004 4500270 : ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1005 4500270 : JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1006 4500270 : if (result == JSRegExpMatch) {
1007 4500261 : appendParenthesesDisjunctionContext(backTrack, context);
1008 4500261 : recordParenthesesMatch(term, context);
1009 4500261 : return JSRegExpMatch;
1010 : }
1011 :
1012 9 : resetMatches(term, context);
1013 9 : freeParenthesesDisjunctionContext(context);
1014 :
1015 9 : if (result != JSRegExpNoMatch)
1016 0 : return result;
1017 : }
1018 :
1019 : // Nope - okay backtrack looking for an alternative.
1020 135 : while (backTrack->matchAmount) {
1021 117 : ParenthesesDisjunctionContext* context = backTrack->lastContext;
1022 117 : JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1023 117 : if (result == JSRegExpMatch) {
1024 : // successful backtrack! we're back in the game!
1025 0 : if (backTrack->matchAmount) {
1026 0 : context = backTrack->lastContext;
1027 0 : recordParenthesesMatch(term, context);
1028 : }
1029 0 : return JSRegExpMatch;
1030 : }
1031 :
1032 : // pop a match off the stack
1033 117 : resetMatches(term, context);
1034 117 : popParenthesesDisjunctionContext(backTrack);
1035 117 : freeParenthesesDisjunctionContext(context);
1036 :
1037 117 : if (result != JSRegExpNoMatch)
1038 0 : return result;
1039 : }
1040 :
1041 9 : return JSRegExpNoMatch;
1042 : }
1043 : }
1044 :
1045 0 : ASSERT_NOT_REACHED();
1046 : return JSRegExpErrorNoMatch;
1047 : }
1048 :
1049 : #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1050 : #define BACKTRACK() { --context->term; goto backtrack; }
1051 : #define currentTerm() (disjunction->terms[context->term])
1052 4753323 : JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1053 : {
1054 4753323 : if (!--remainingMatchCount)
1055 0 : return JSRegExpErrorHitLimit;
1056 :
1057 4753323 : if (btrack)
1058 4290 : BACKTRACK();
1059 :
1060 4749033 : context->matchBegin = input.getPos();
1061 4749033 : context->term = 0;
1062 :
1063 : matchAgain:
1064 38087560 : ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1065 :
1066 38087560 : switch (currentTerm().type) {
1067 : case ByteTerm::TypeSubpatternBegin:
1068 4681317 : MATCH_NEXT();
1069 : case ByteTerm::TypeSubpatternEnd:
1070 4618874 : context->matchEnd = input.getPos();
1071 4618874 : return JSRegExpMatch;
1072 :
1073 : case ByteTerm::TypeBodyAlternativeBegin:
1074 67716 : MATCH_NEXT();
1075 : case ByteTerm::TypeBodyAlternativeDisjunction:
1076 : case ByteTerm::TypeBodyAlternativeEnd:
1077 63014 : context->matchEnd = input.getPos();
1078 63014 : return JSRegExpMatch;
1079 :
1080 : case ByteTerm::TypeAlternativeBegin:
1081 4608032 : MATCH_NEXT();
1082 : case ByteTerm::TypeAlternativeDisjunction:
1083 : case ByteTerm::TypeAlternativeEnd: {
1084 4599591 : int offset = currentTerm().alternative.end;
1085 4599591 : BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1086 4599591 : backTrack->offset = offset;
1087 4599591 : context->term += offset;
1088 4599591 : MATCH_NEXT();
1089 : }
1090 :
1091 : case ByteTerm::TypeAssertionBOL:
1092 117064 : if (matchAssertionBOL(currentTerm()))
1093 61407 : MATCH_NEXT();
1094 55657 : BACKTRACK();
1095 : case ByteTerm::TypeAssertionEOL:
1096 53476 : if (matchAssertionEOL(currentTerm()))
1097 52214 : MATCH_NEXT();
1098 1262 : BACKTRACK();
1099 : case ByteTerm::TypeAssertionWordBoundary:
1100 0 : if (matchAssertionWordBoundary(currentTerm()))
1101 0 : MATCH_NEXT();
1102 0 : BACKTRACK();
1103 :
1104 : case ByteTerm::TypePatternCharacterOnce:
1105 : case ByteTerm::TypePatternCharacterFixed: {
1106 125485 : for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1107 114607 : if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
1108 103729 : BACKTRACK();
1109 : }
1110 10878 : MATCH_NEXT();
1111 : }
1112 : case ByteTerm::TypePatternCharacterGreedy: {
1113 2439 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1114 2439 : unsigned matchAmount = 0;
1115 4878 : while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1116 2259 : if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
1117 2259 : input.uncheckInput(1);
1118 2259 : break;
1119 : }
1120 0 : ++matchAmount;
1121 : }
1122 2439 : backTrack->matchAmount = matchAmount;
1123 :
1124 2439 : MATCH_NEXT();
1125 : }
1126 : case ByteTerm::TypePatternCharacterNonGreedy: {
1127 9 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1128 9 : backTrack->matchAmount = 0;
1129 9 : MATCH_NEXT();
1130 : }
1131 :
1132 : case ByteTerm::TypePatternCasedCharacterOnce:
1133 : case ByteTerm::TypePatternCasedCharacterFixed: {
1134 4501269 : for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1135 4500999 : if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
1136 4500729 : BACKTRACK();
1137 : }
1138 270 : MATCH_NEXT();
1139 : }
1140 : case ByteTerm::TypePatternCasedCharacterGreedy: {
1141 0 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1142 0 : unsigned matchAmount = 0;
1143 0 : while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1144 0 : if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
1145 0 : input.uncheckInput(1);
1146 0 : break;
1147 : }
1148 0 : ++matchAmount;
1149 : }
1150 0 : backTrack->matchAmount = matchAmount;
1151 :
1152 0 : MATCH_NEXT();
1153 : }
1154 : case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1155 0 : BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1156 0 : backTrack->matchAmount = 0;
1157 0 : MATCH_NEXT();
1158 : }
1159 :
1160 : case ByteTerm::TypeCharacterClass:
1161 7247118 : if (matchCharacterClass(currentTerm(), context))
1162 4958229 : MATCH_NEXT();
1163 2288889 : BACKTRACK();
1164 : case ByteTerm::TypeBackReference:
1165 1386 : if (matchBackReference(currentTerm(), context))
1166 1359 : MATCH_NEXT();
1167 27 : BACKTRACK();
1168 : case ByteTerm::TypeParenthesesSubpattern: {
1169 62731 : JSRegExpResult result = matchParentheses(currentTerm(), context);
1170 :
1171 62731 : if (result == JSRegExpMatch) {
1172 62731 : MATCH_NEXT();
1173 0 : } else if (result != JSRegExpNoMatch)
1174 0 : return result;
1175 :
1176 0 : BACKTRACK();
1177 : }
1178 : case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1179 129837 : if (matchParenthesesOnceBegin(currentTerm(), context))
1180 129837 : MATCH_NEXT();
1181 0 : BACKTRACK();
1182 : case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1183 142299 : if (matchParenthesesOnceEnd(currentTerm(), context))
1184 142290 : MATCH_NEXT();
1185 9 : BACKTRACK();
1186 : case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1187 45 : if (matchParenthesesTerminalBegin(currentTerm(), context))
1188 45 : MATCH_NEXT();
1189 0 : BACKTRACK();
1190 : case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1191 81 : if (matchParenthesesTerminalEnd(currentTerm(), context))
1192 36 : MATCH_NEXT();
1193 45 : BACKTRACK();
1194 : case ByteTerm::TypeParentheticalAssertionBegin:
1195 288 : if (matchParentheticalAssertionBegin(currentTerm(), context))
1196 288 : MATCH_NEXT();
1197 0 : BACKTRACK();
1198 : case ByteTerm::TypeParentheticalAssertionEnd:
1199 18 : if (matchParentheticalAssertionEnd(currentTerm(), context))
1200 9 : MATCH_NEXT();
1201 9 : BACKTRACK();
1202 :
1203 : case ByteTerm::TypeCheckInput:
1204 7076619 : if (input.checkInput(currentTerm().checkInputCount))
1205 7019560 : MATCH_NEXT();
1206 57059 : BACKTRACK();
1207 :
1208 : case ByteTerm::TypeUncheckInput:
1209 0 : input.uncheckInput(currentTerm().checkInputCount);
1210 0 : MATCH_NEXT();
1211 : }
1212 :
1213 : // We should never fall-through to here.
1214 0 : ASSERT_NOT_REACHED();
1215 :
1216 : backtrack:
1217 9618690 : ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1218 :
1219 9618690 : switch (currentTerm().type) {
1220 : case ByteTerm::TypeSubpatternBegin:
1221 66733 : return JSRegExpNoMatch;
1222 : case ByteTerm::TypeSubpatternEnd:
1223 0 : ASSERT_NOT_REACHED();
1224 :
1225 : case ByteTerm::TypeBodyAlternativeBegin:
1226 : case ByteTerm::TypeBodyAlternativeDisjunction: {
1227 83544 : int offset = currentTerm().alternative.next;
1228 83544 : context->term += offset;
1229 83544 : if (offset > 0)
1230 2007 : MATCH_NEXT();
1231 :
1232 81537 : if (input.atEnd())
1233 4702 : return JSRegExpNoMatch;
1234 :
1235 76835 : input.next();
1236 :
1237 76835 : context->matchBegin = input.getPos();
1238 :
1239 76835 : if (currentTerm().alternative.onceThrough)
1240 66206 : context->term += currentTerm().alternative.next;
1241 :
1242 76835 : MATCH_NEXT();
1243 : }
1244 : case ByteTerm::TypeBodyAlternativeEnd:
1245 0 : ASSERT_NOT_REACHED();
1246 :
1247 : case ByteTerm::TypeAlternativeBegin:
1248 : case ByteTerm::TypeAlternativeDisjunction: {
1249 2279856 : int offset = currentTerm().alternative.next;
1250 2279856 : context->term += offset;
1251 2279856 : if (offset > 0)
1252 2269550 : MATCH_NEXT();
1253 10306 : BACKTRACK();
1254 : }
1255 : case ByteTerm::TypeAlternativeEnd: {
1256 : // We should never backtrack back into an alternative of the main body of the regex.
1257 1865 : BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1258 1865 : unsigned offset = backTrack->offset;
1259 1865 : context->term -= offset;
1260 1865 : BACKTRACK();
1261 : }
1262 :
1263 : case ByteTerm::TypeAssertionBOL:
1264 : case ByteTerm::TypeAssertionEOL:
1265 : case ByteTerm::TypeAssertionWordBoundary:
1266 6124 : BACKTRACK();
1267 :
1268 : case ByteTerm::TypePatternCharacterOnce:
1269 : case ByteTerm::TypePatternCharacterFixed:
1270 : case ByteTerm::TypePatternCharacterGreedy:
1271 : case ByteTerm::TypePatternCharacterNonGreedy:
1272 4197 : if (backtrackPatternCharacter(currentTerm(), context))
1273 0 : MATCH_NEXT();
1274 4197 : BACKTRACK();
1275 : case ByteTerm::TypePatternCasedCharacterOnce:
1276 : case ByteTerm::TypePatternCasedCharacterFixed:
1277 : case ByteTerm::TypePatternCasedCharacterGreedy:
1278 : case ByteTerm::TypePatternCasedCharacterNonGreedy:
1279 117 : if (backtrackPatternCasedCharacter(currentTerm(), context))
1280 0 : MATCH_NEXT();
1281 117 : BACKTRACK();
1282 : case ByteTerm::TypeCharacterClass:
1283 163310 : if (backtrackCharacterClass(currentTerm(), context))
1284 74521 : MATCH_NEXT();
1285 88789 : BACKTRACK();
1286 : case ByteTerm::TypeBackReference:
1287 1170 : if (backtrackBackReference(currentTerm(), context))
1288 0 : MATCH_NEXT();
1289 1170 : BACKTRACK();
1290 : case ByteTerm::TypeParenthesesSubpattern: {
1291 4505726 : JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1292 :
1293 4505726 : if (result == JSRegExpMatch) {
1294 4504173 : MATCH_NEXT();
1295 1553 : } else if (result != JSRegExpNoMatch)
1296 0 : return result;
1297 :
1298 1553 : BACKTRACK();
1299 : }
1300 : case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1301 52914 : if (backtrackParenthesesOnceBegin(currentTerm(), context))
1302 12905 : MATCH_NEXT();
1303 40009 : BACKTRACK();
1304 : case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1305 78065 : if (backtrackParenthesesOnceEnd(currentTerm(), context))
1306 0 : MATCH_NEXT();
1307 78065 : BACKTRACK();
1308 : case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1309 9 : if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1310 9 : MATCH_NEXT();
1311 0 : BACKTRACK();
1312 : case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1313 0 : if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1314 0 : MATCH_NEXT();
1315 0 : BACKTRACK();
1316 : case ByteTerm::TypeParentheticalAssertionBegin:
1317 270 : if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1318 270 : MATCH_NEXT();
1319 0 : BACKTRACK();
1320 : case ByteTerm::TypeParentheticalAssertionEnd:
1321 126 : if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1322 0 : MATCH_NEXT();
1323 126 : BACKTRACK();
1324 :
1325 : case ByteTerm::TypeCheckInput:
1326 2374664 : input.uncheckInput(currentTerm().checkInputCount);
1327 2374664 : BACKTRACK();
1328 :
1329 : case ByteTerm::TypeUncheckInput:
1330 0 : input.checkInput(currentTerm().checkInputCount);
1331 0 : BACKTRACK();
1332 : }
1333 :
1334 0 : ASSERT_NOT_REACHED();
1335 : return JSRegExpErrorNoMatch;
1336 : }
1337 :
1338 4685346 : JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1339 : {
1340 4685346 : JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1341 :
1342 4685346 : if (result == JSRegExpMatch) {
1343 9237541 : while (context->matchBegin == context->matchEnd) {
1344 261 : result = matchDisjunction(disjunction, context, true);
1345 261 : if (result != JSRegExpMatch)
1346 54 : return result;
1347 : }
1348 4618613 : return JSRegExpMatch;
1349 : }
1350 :
1351 66679 : return result;
1352 : }
1353 :
1354 67716 : int interpret()
1355 : {
1356 67716 : allocatorPool = pattern->m_allocator->startAllocator();
1357 67716 : if (!allocatorPool)
1358 0 : CRASH();
1359 :
1360 425246 : for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
1361 357530 : output[i] = -1;
1362 :
1363 67716 : DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1364 :
1365 67716 : JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1366 67716 : if (result == JSRegExpMatch) {
1367 63014 : output[0] = context->matchBegin;
1368 63014 : output[1] = context->matchEnd;
1369 : }
1370 :
1371 67716 : freeDisjunctionContext(context);
1372 :
1373 67716 : pattern->m_allocator->stopAllocator();
1374 :
1375 : // RegExp.cpp currently expects all error to be converted to -1.
1376 67716 : ASSERT((result == JSRegExpMatch) == (output[0] != -1));
1377 67716 : return output[0];
1378 : }
1379 :
1380 67716 : Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
1381 : : pattern(pattern)
1382 : , output(output)
1383 : , input(inputChar, start, length)
1384 : , allocatorPool(0)
1385 67716 : , remainingMatchCount(matchLimit)
1386 : {
1387 67716 : }
1388 :
1389 : private:
1390 : BytecodePattern* pattern;
1391 : int* output;
1392 : InputStream input;
1393 : BumpPointerPool* allocatorPool;
1394 : unsigned remainingMatchCount;
1395 : };
1396 :
1397 :
1398 :
1399 2975 : class ByteCompiler {
1400 20397 : struct ParenthesesStackEntry {
1401 : unsigned beginTerm;
1402 : unsigned savedAlternativeIndex;
1403 : // For js::Vector. Does not create a valid object.
1404 0 : ParenthesesStackEntry() {}
1405 11816 : ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1406 : : beginTerm(beginTerm)
1407 11816 : , savedAlternativeIndex(savedAlternativeIndex)
1408 : {
1409 11816 : }
1410 : };
1411 :
1412 : public:
1413 2975 : ByteCompiler(YarrPattern& pattern)
1414 2975 : : m_pattern(pattern)
1415 : {
1416 2975 : m_currentAlternativeIndex = 0;
1417 2975 : }
1418 :
1419 2975 : PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1420 : {
1421 2975 : regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1422 2975 : emitDisjunction(m_pattern.m_body);
1423 2975 : regexEnd();
1424 :
1425 2975 : return adoptPtr(js::OffTheBooks::new_<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator));
1426 : }
1427 :
1428 14426 : void checkInput(unsigned count)
1429 : {
1430 14426 : m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1431 14426 : }
1432 :
1433 0 : void uncheckInput(unsigned count)
1434 : {
1435 0 : m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1436 0 : }
1437 :
1438 2885 : void assertionBOL(int inputPosition)
1439 : {
1440 2885 : m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1441 2885 : }
1442 :
1443 2822 : void assertionEOL(int inputPosition)
1444 : {
1445 2822 : m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1446 2822 : }
1447 :
1448 9 : void assertionWordBoundary(bool invert, int inputPosition)
1449 : {
1450 9 : m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1451 9 : }
1452 :
1453 11989 : void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1454 : {
1455 11989 : if (m_pattern.m_ignoreCase) {
1456 11554 : UChar lo = Unicode::toLower(ch);
1457 11554 : UChar hi = Unicode::toUpper(ch);
1458 :
1459 11554 : if (lo != hi) {
1460 297 : m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1461 297 : return;
1462 : }
1463 : }
1464 :
1465 11692 : m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1466 : }
1467 :
1468 39666 : void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1469 : {
1470 39666 : m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1471 :
1472 39666 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1473 39666 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1474 39666 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1475 39666 : }
1476 :
1477 18 : void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1478 : {
1479 18 : ASSERT(subpatternId);
1480 :
1481 18 : m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1482 :
1483 18 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1484 18 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1485 18 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1486 18 : }
1487 :
1488 8774 : void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1489 : {
1490 8774 : int beginTerm = m_bodyDisjunction->terms.size();
1491 :
1492 8774 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1493 8774 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1494 8774 : m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1495 8774 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1496 :
1497 8774 : m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1498 8774 : m_currentAlternativeIndex = beginTerm + 1;
1499 8774 : }
1500 :
1501 9 : void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1502 : {
1503 9 : int beginTerm = m_bodyDisjunction->terms.size();
1504 :
1505 9 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1506 9 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1507 9 : m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1508 9 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1509 :
1510 9 : m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1511 9 : m_currentAlternativeIndex = beginTerm + 1;
1512 9 : }
1513 :
1514 2997 : void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1515 : {
1516 : // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1517 : // then fix this up at the end! - simplifying this should make it much clearer.
1518 : // https://bugs.webkit.org/show_bug.cgi?id=50136
1519 :
1520 2997 : int beginTerm = m_bodyDisjunction->terms.size();
1521 :
1522 2997 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1523 2997 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1524 2997 : m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1525 2997 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1526 :
1527 2997 : m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1528 2997 : m_currentAlternativeIndex = beginTerm + 1;
1529 2997 : }
1530 :
1531 36 : void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1532 : {
1533 36 : int beginTerm = m_bodyDisjunction->terms.size();
1534 :
1535 36 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1536 36 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1537 36 : m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1538 36 : m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1539 :
1540 36 : m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1541 36 : m_currentAlternativeIndex = beginTerm + 1;
1542 36 : }
1543 :
1544 36 : void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1545 : {
1546 36 : unsigned beginTerm = popParenthesesStack();
1547 36 : closeAlternative(beginTerm + 1);
1548 36 : unsigned endTerm = m_bodyDisjunction->terms.size();
1549 :
1550 36 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1551 :
1552 36 : bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1553 36 : unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1554 :
1555 36 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1556 36 : m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1557 36 : m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1558 36 : m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1559 :
1560 36 : m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1561 36 : m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1562 36 : m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1563 36 : m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1564 36 : }
1565 :
1566 11816 : unsigned popParenthesesStack()
1567 : {
1568 11816 : ASSERT(m_parenthesesStack.size());
1569 11816 : int stackEnd = m_parenthesesStack.size() - 1;
1570 11816 : unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1571 11816 : m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1572 11816 : m_parenthesesStack.shrink(stackEnd);
1573 :
1574 11816 : ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1575 11816 : ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1576 :
1577 11816 : return beginTerm;
1578 : }
1579 :
1580 : #ifndef NDEBUG
1581 : void dumpDisjunction(ByteDisjunction* disjunction)
1582 : {
1583 : printf("ByteDisjunction(%p):\n\t", (void *) disjunction);
1584 : for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1585 : printf("{ %d } ", disjunction->terms[i].type);
1586 : printf("\n");
1587 : }
1588 : #endif
1589 :
1590 11816 : void closeAlternative(int beginTerm)
1591 : {
1592 11816 : int origBeginTerm = beginTerm;
1593 11816 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1594 11816 : int endIndex = m_bodyDisjunction->terms.size();
1595 :
1596 11816 : unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1597 :
1598 11816 : if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1599 8842 : m_bodyDisjunction->terms.remove(beginTerm);
1600 : else {
1601 8922 : while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1602 2974 : beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1603 2974 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1604 2974 : m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1605 2974 : m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1606 : }
1607 :
1608 2974 : m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1609 :
1610 2974 : m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1611 2974 : m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1612 : }
1613 11816 : }
1614 :
1615 2975 : void closeBodyAlternative()
1616 : {
1617 2975 : int beginTerm = 0;
1618 2975 : int origBeginTerm = 0;
1619 2975 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1620 2975 : int endIndex = m_bodyDisjunction->terms.size();
1621 :
1622 2975 : unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1623 :
1624 5968 : while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1625 18 : beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1626 18 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1627 18 : m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1628 18 : m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1629 : }
1630 :
1631 2975 : m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1632 :
1633 2975 : m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1634 2975 : m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1635 2975 : }
1636 :
1637 2997 : void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1638 : {
1639 2997 : unsigned beginTerm = popParenthesesStack();
1640 2997 : closeAlternative(beginTerm + 1);
1641 2997 : unsigned endTerm = m_bodyDisjunction->terms.size();
1642 :
1643 2997 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1644 :
1645 2997 : ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1646 :
1647 2997 : bool capture = parenthesesBegin.capture();
1648 2997 : unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1649 :
1650 2997 : unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1651 2997 : ByteDisjunction* parenthesesDisjunction = js::OffTheBooks::new_<ByteDisjunction>(numSubpatterns, callFrameSize);
1652 :
1653 2997 : parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1654 26624 : for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1655 23627 : parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1656 2997 : parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1657 :
1658 2997 : m_bodyDisjunction->terms.shrink(beginTerm);
1659 :
1660 2997 : m_allParenthesesInfo.append(parenthesesDisjunction);
1661 2997 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1662 :
1663 2997 : m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1664 2997 : m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1665 2997 : m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1666 2997 : }
1667 :
1668 8774 : void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1669 : {
1670 8774 : unsigned beginTerm = popParenthesesStack();
1671 8774 : closeAlternative(beginTerm + 1);
1672 8774 : unsigned endTerm = m_bodyDisjunction->terms.size();
1673 :
1674 8774 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1675 :
1676 8774 : bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1677 8774 : unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1678 :
1679 8774 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1680 8774 : m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1681 8774 : m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1682 8774 : m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1683 :
1684 8774 : m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1685 8774 : m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1686 8774 : m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1687 8774 : m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1688 8774 : }
1689 :
1690 9 : void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1691 : {
1692 9 : unsigned beginTerm = popParenthesesStack();
1693 9 : closeAlternative(beginTerm + 1);
1694 9 : unsigned endTerm = m_bodyDisjunction->terms.size();
1695 :
1696 9 : ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1697 :
1698 9 : bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1699 9 : unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1700 :
1701 9 : m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1702 9 : m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1703 9 : m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1704 9 : m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1705 :
1706 9 : m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1707 9 : m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1708 9 : m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1709 9 : m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1710 9 : }
1711 :
1712 2975 : void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1713 : {
1714 2975 : m_bodyDisjunction = adoptPtr(js::OffTheBooks::new_<ByteDisjunction>(numSubpatterns, callFrameSize));
1715 2975 : m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1716 2975 : m_bodyDisjunction->terms[0].frameLocation = 0;
1717 2975 : m_currentAlternativeIndex = 0;
1718 2975 : }
1719 :
1720 2975 : void regexEnd()
1721 : {
1722 2975 : closeBodyAlternative();
1723 2975 : }
1724 :
1725 18 : void alternativeBodyDisjunction(bool onceThrough)
1726 : {
1727 18 : int newAlternativeIndex = m_bodyDisjunction->terms.size();
1728 18 : m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1729 18 : m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1730 :
1731 18 : m_currentAlternativeIndex = newAlternativeIndex;
1732 18 : }
1733 :
1734 2974 : void alternativeDisjunction()
1735 : {
1736 2974 : int newAlternativeIndex = m_bodyDisjunction->terms.size();
1737 2974 : m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1738 2974 : m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1739 :
1740 2974 : m_currentAlternativeIndex = newAlternativeIndex;
1741 2974 : }
1742 :
1743 14791 : void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1744 : {
1745 32574 : for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1746 17783 : unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1747 :
1748 17783 : PatternAlternative* alternative = disjunction->m_alternatives[alt];
1749 :
1750 17783 : if (alt) {
1751 2992 : if (disjunction == m_pattern.m_body)
1752 18 : alternativeBodyDisjunction(alternative->onceThrough());
1753 : else
1754 2974 : alternativeDisjunction();
1755 : }
1756 :
1757 17783 : unsigned minimumSize = alternative->m_minimumSize;
1758 17783 : ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1759 17783 : unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1760 :
1761 17783 : if (countToCheck) {
1762 14426 : checkInput(countToCheck);
1763 14426 : currentCountAlreadyChecked += countToCheck;
1764 : }
1765 :
1766 86988 : for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1767 69205 : PatternTerm& term = alternative->m_terms[i];
1768 :
1769 69205 : switch (term.type) {
1770 : case PatternTerm::TypeAssertionBOL:
1771 2885 : assertionBOL(term.inputPosition - currentCountAlreadyChecked);
1772 2885 : break;
1773 :
1774 : case PatternTerm::TypeAssertionEOL:
1775 2822 : assertionEOL(term.inputPosition - currentCountAlreadyChecked);
1776 2822 : break;
1777 :
1778 : case PatternTerm::TypeAssertionWordBoundary:
1779 9 : assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
1780 9 : break;
1781 :
1782 : case PatternTerm::TypePatternCharacter:
1783 11989 : atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1784 11989 : break;
1785 :
1786 : case PatternTerm::TypeCharacterClass:
1787 39666 : atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1788 39666 : break;
1789 :
1790 : case PatternTerm::TypeBackReference:
1791 18 : atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1792 18 : break;
1793 :
1794 : case PatternTerm::TypeForwardReference:
1795 0 : break;
1796 :
1797 : case PatternTerm::TypeParenthesesSubpattern: {
1798 11780 : unsigned disjunctionAlreadyCheckedCount = 0;
1799 11780 : if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1800 8774 : unsigned alternativeFrameLocation = term.frameLocation;
1801 : // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1802 8774 : if (term.quantityType == QuantifierFixedCount)
1803 3150 : disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1804 : else
1805 5624 : alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1806 8774 : unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1807 8774 : atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
1808 8774 : emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1809 8774 : atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1810 3006 : } else if (term.parentheses.isTerminal) {
1811 9 : unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1812 9 : atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1813 9 : emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1814 9 : atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1815 : } else {
1816 2997 : unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1817 2997 : atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
1818 2997 : emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1819 2997 : atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1820 : }
1821 11780 : break;
1822 : }
1823 :
1824 : case PatternTerm::TypeParentheticalAssertion: {
1825 36 : unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1826 :
1827 36 : ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1828 36 : unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1829 36 : unsigned uncheckAmount = 0;
1830 36 : if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1831 0 : uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1832 0 : uncheckInput(uncheckAmount);
1833 0 : currentCountAlreadyChecked -= uncheckAmount;
1834 : }
1835 :
1836 36 : atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1837 36 : emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1838 36 : atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1839 36 : if (uncheckAmount) {
1840 0 : checkInput(uncheckAmount);
1841 0 : currentCountAlreadyChecked += uncheckAmount;
1842 : }
1843 36 : break;
1844 : }
1845 : }
1846 : }
1847 : }
1848 14791 : }
1849 :
1850 : private:
1851 : YarrPattern& m_pattern;
1852 : OwnPtr<ByteDisjunction> m_bodyDisjunction;
1853 : unsigned m_currentAlternativeIndex;
1854 : Vector<ParenthesesStackEntry> m_parenthesesStack;
1855 : Vector<ByteDisjunction*> m_allParenthesesInfo;
1856 : };
1857 :
1858 2975 : PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1859 : {
1860 2975 : return ByteCompiler(pattern).compile(allocator);
1861 : }
1862 :
1863 67716 : int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output)
1864 : {
1865 67716 : return Interpreter(bytecode, output, input, start, length).interpret();
1866 : }
1867 :
1868 : COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1869 : COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1870 : COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1871 : COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1872 : COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1873 : COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1874 : COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1875 :
1876 :
1877 : } }
|