1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sw=4 et tw=99 ft=cpp:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Copyright (C) 2009 Apple Inc. All rights reserved.
6 : *
7 : * Redistribution and use in source and binary forms, with or without
8 : * modification, are permitted provided that the following conditions
9 : * are met:
10 : * 1. Redistributions of source code must retain the above copyright
11 : * notice, this list of conditions and the following disclaimer.
12 : * 2. Redistributions in binary form must reproduce the above copyright
13 : * notice, this list of conditions and the following disclaimer in the
14 : * documentation and/or other materials provided with the distribution.
15 : *
16 : * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17 : * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 : * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
20 : * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 : * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 : * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 : * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 : * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 : *
28 : * ***** END LICENSE BLOCK ***** */
29 :
30 : #include "YarrJIT.h"
31 :
32 : #include "assembler/assembler/LinkBuffer.h"
33 : #include "Yarr.h"
34 :
35 : #if ENABLE_YARR_JIT
36 :
37 : using namespace WTF;
38 :
39 : namespace JSC { namespace Yarr {
40 :
41 58226 : class YarrGenerator : private MacroAssembler {
42 : friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
43 :
44 : #if WTF_CPU_ARM
45 : static const RegisterID input = ARMRegisters::r0;
46 : static const RegisterID index = ARMRegisters::r1;
47 : static const RegisterID length = ARMRegisters::r2;
48 : static const RegisterID output = ARMRegisters::r4;
49 :
50 : static const RegisterID regT0 = ARMRegisters::r5;
51 : static const RegisterID regT1 = ARMRegisters::r6;
52 :
53 : static const RegisterID returnRegister = ARMRegisters::r0;
54 : #elif WTF_CPU_MIPS
55 : static const RegisterID input = MIPSRegisters::a0;
56 : static const RegisterID index = MIPSRegisters::a1;
57 : static const RegisterID length = MIPSRegisters::a2;
58 : static const RegisterID output = MIPSRegisters::a3;
59 :
60 : static const RegisterID regT0 = MIPSRegisters::t4;
61 : static const RegisterID regT1 = MIPSRegisters::t5;
62 :
63 : static const RegisterID returnRegister = MIPSRegisters::v0;
64 : #elif WTF_CPU_SH4
65 : static const RegisterID input = SH4Registers::r4;
66 : static const RegisterID index = SH4Registers::r5;
67 : static const RegisterID length = SH4Registers::r6;
68 : static const RegisterID output = SH4Registers::r7;
69 :
70 : static const RegisterID regT0 = SH4Registers::r0;
71 : static const RegisterID regT1 = SH4Registers::r1;
72 :
73 : static const RegisterID returnRegister = SH4Registers::r0;
74 : #elif WTF_CPU_SPARC
75 : static const RegisterID input = SparcRegisters::i0;
76 : static const RegisterID index = SparcRegisters::i1;
77 : static const RegisterID length = SparcRegisters::i2;
78 : static const RegisterID output = SparcRegisters::i3;
79 :
80 : static const RegisterID regT0 = SparcRegisters::i4;
81 : static const RegisterID regT1 = SparcRegisters::i5;
82 :
83 : static const RegisterID returnRegister = SparcRegisters::i0;
84 : #elif WTF_CPU_X86
85 : static const RegisterID input = X86Registers::eax;
86 : static const RegisterID index = X86Registers::edx;
87 : static const RegisterID length = X86Registers::ecx;
88 : static const RegisterID output = X86Registers::edi;
89 :
90 : static const RegisterID regT0 = X86Registers::ebx;
91 : static const RegisterID regT1 = X86Registers::esi;
92 :
93 : static const RegisterID returnRegister = X86Registers::eax;
94 : #elif WTF_CPU_X86_64
95 : #if WTF_PLATFORM_WIN
96 : static const RegisterID input = X86Registers::ecx;
97 : static const RegisterID index = X86Registers::edx;
98 : static const RegisterID length = X86Registers::r8;
99 : static const RegisterID output = X86Registers::r9;
100 : #else
101 : static const RegisterID input = X86Registers::edi;
102 : static const RegisterID index = X86Registers::esi;
103 : static const RegisterID length = X86Registers::edx;
104 : static const RegisterID output = X86Registers::ecx;
105 : #endif
106 :
107 : static const RegisterID regT0 = X86Registers::eax;
108 : static const RegisterID regT1 = X86Registers::ebx;
109 :
110 : static const RegisterID returnRegister = X86Registers::eax;
111 : #endif
112 :
113 116012 : void optimizeAlternative(PatternAlternative* alternative)
114 : {
115 116012 : if (!alternative->m_terms.size())
116 12735 : return;
117 :
118 397231 : for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
119 293954 : PatternTerm& term = alternative->m_terms[i];
120 293954 : PatternTerm& nextTerm = alternative->m_terms[i + 1];
121 :
122 293954 : if ((term.type == PatternTerm::TypeCharacterClass)
123 : && (term.quantityType == QuantifierFixedCount)
124 : && (nextTerm.type == PatternTerm::TypePatternCharacter)
125 : && (nextTerm.quantityType == QuantifierFixedCount)) {
126 3487 : PatternTerm termCopy = term;
127 3487 : alternative->m_terms[i] = nextTerm;
128 3487 : alternative->m_terms[i + 1] = termCopy;
129 : }
130 : }
131 : }
132 :
133 48462 : void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
134 : {
135 48462 : do {
136 : // pick which range we're going to generate
137 48462 : int which = count >> 1;
138 48462 : char lo = ranges[which].begin;
139 48462 : char hi = ranges[which].end;
140 :
141 : // check if there are any ranges or matches below lo. If not, just jl to failure -
142 : // if there is anything else to check, check that first, if it falls through jmp to failure.
143 48462 : if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
144 12963 : Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
145 :
146 : // generate code for all ranges before this one
147 12963 : if (which)
148 4338 : matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
149 :
150 36727 : while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
151 10801 : matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
152 10801 : ++*matchIndex;
153 : }
154 12963 : failures.append(jump());
155 :
156 12963 : loOrAbove.link(this);
157 35499 : } else if (which) {
158 2780 : Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
159 :
160 2780 : matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
161 2780 : failures.append(jump());
162 :
163 2780 : loOrAbove.link(this);
164 : } else
165 32719 : failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
166 :
167 96924 : while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
168 0 : ++*matchIndex;
169 :
170 48462 : matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
171 : // fall through to here, the value is above hi.
172 :
173 : // shuffle along & loop around if there are any more matches to handle.
174 48462 : unsigned next = which + 1;
175 48462 : ranges += next;
176 48462 : count -= next;
177 : } while (count);
178 41833 : }
179 :
180 62522 : void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
181 : {
182 62522 : if (charClass->m_table) {
183 3435 : ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
184 3435 : matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
185 3435 : return;
186 : }
187 59087 : Jump unicodeFail;
188 59087 : if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
189 5929 : Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
190 :
191 5929 : if (charClass->m_matchesUnicode.size()) {
192 18552 : for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
193 12635 : UChar ch = charClass->m_matchesUnicode[i];
194 12635 : matchDest.append(branch32(Equal, character, Imm32(ch)));
195 : }
196 : }
197 :
198 5929 : if (charClass->m_rangesUnicode.size()) {
199 294 : for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
200 147 : UChar lo = charClass->m_rangesUnicode[i].begin;
201 147 : UChar hi = charClass->m_rangesUnicode[i].end;
202 :
203 147 : Jump below = branch32(LessThan, character, Imm32(lo));
204 147 : matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
205 147 : below.link(this);
206 : }
207 : }
208 :
209 5929 : unicodeFail = jump();
210 5929 : isAscii.link(this);
211 : }
212 :
213 59087 : if (charClass->m_ranges.size()) {
214 34715 : unsigned matchIndex = 0;
215 69430 : JumpList failures;
216 34715 : matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
217 69722 : while (matchIndex < charClass->m_matches.size())
218 292 : matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
219 :
220 34715 : failures.link(this);
221 24372 : } else if (charClass->m_matches.size()) {
222 : // optimization: gather 'a','A' etc back together, can mask & test once.
223 48744 : Vector<char> matchesAZaz;
224 :
225 85922 : for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
226 61550 : char ch = charClass->m_matches[i];
227 61550 : if (m_pattern.m_ignoreCase) {
228 10920 : if (isASCIILower(ch)) {
229 3578 : matchesAZaz.append(ch);
230 3578 : continue;
231 : }
232 7342 : if (isASCIIUpper(ch))
233 3578 : continue;
234 : }
235 54394 : matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
236 : }
237 :
238 24372 : if (unsigned countAZaz = matchesAZaz.size()) {
239 265 : or32(TrustedImm32(32), character);
240 3843 : for (unsigned i = 0; i < countAZaz; ++i)
241 3578 : matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
242 : }
243 : }
244 :
245 59087 : if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
246 5929 : unicodeFail.link(this);
247 : }
248 :
249 : // Jumps if input not available; will have (incorrectly) incremented already!
250 106207 : Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
251 : {
252 106207 : if (countToCheck)
253 63571 : add32(Imm32(countToCheck), index);
254 106207 : return branch32(Above, index, length);
255 : }
256 :
257 : Jump jumpIfAvailableInput(unsigned countToCheck)
258 : {
259 : add32(Imm32(countToCheck), index);
260 : return branch32(BelowOrEqual, index, length);
261 : }
262 :
263 39159 : Jump checkInput()
264 : {
265 39159 : return branch32(BelowOrEqual, index, length);
266 : }
267 :
268 35875 : Jump atEndOfInput()
269 : {
270 35875 : return branch32(Equal, index, length);
271 : }
272 :
273 13287 : Jump notAtEndOfInput()
274 : {
275 13287 : return branch32(NotEqual, index, length);
276 : }
277 :
278 : Jump jumpIfCharEquals(UChar ch, int inputPosition)
279 : {
280 : return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
281 : }
282 :
283 57430 : Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
284 : {
285 57430 : return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
286 : }
287 :
288 60031 : void readCharacter(int inputPosition, RegisterID reg)
289 : {
290 60031 : load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
291 60031 : }
292 :
293 44954 : void storeToFrame(RegisterID reg, unsigned frameLocation)
294 : {
295 44954 : poke(reg, frameLocation);
296 44954 : }
297 :
298 6924 : void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
299 : {
300 6924 : poke(imm, frameLocation);
301 6924 : }
302 :
303 6569 : DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
304 : {
305 6569 : return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
306 : }
307 :
308 35313 : void loadFromFrame(unsigned frameLocation, RegisterID reg)
309 : {
310 35313 : peek(reg, frameLocation);
311 35313 : }
312 :
313 2921 : void loadFromFrameAndJump(unsigned frameLocation)
314 : {
315 2921 : jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
316 2921 : }
317 :
318 : enum YarrOpCode {
319 : // These nodes wrap body alternatives - those in the main disjunction,
320 : // rather than subpatterns or assertions. These are chained together in
321 : // a doubly linked list, with a 'begin' node for the first alternative,
322 : // a 'next' node for each subsequent alternative, and an 'end' node at
323 : // the end. In the case of repeating alternatives, the 'end' node also
324 : // has a reference back to 'begin'.
325 : OpBodyAlternativeBegin,
326 : OpBodyAlternativeNext,
327 : OpBodyAlternativeEnd,
328 : // Similar to the body alternatives, but used for subpatterns with two
329 : // or more alternatives.
330 : OpNestedAlternativeBegin,
331 : OpNestedAlternativeNext,
332 : OpNestedAlternativeEnd,
333 : // Used for alternatives in subpatterns where there is only a single
334 : // alternative (backtrackingis easier in these cases), or for alternatives
335 : // which never need to be backtracked (those in parenthetical assertions,
336 : // terminal subpatterns).
337 : OpSimpleNestedAlternativeBegin,
338 : OpSimpleNestedAlternativeNext,
339 : OpSimpleNestedAlternativeEnd,
340 : // Used to wrap 'Once' subpattern matches (quantityCount == 1).
341 : OpParenthesesSubpatternOnceBegin,
342 : OpParenthesesSubpatternOnceEnd,
343 : // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
344 : OpParenthesesSubpatternTerminalBegin,
345 : OpParenthesesSubpatternTerminalEnd,
346 : // Used to wrap parenthetical assertions.
347 : OpParentheticalAssertionBegin,
348 : OpParentheticalAssertionEnd,
349 : // Wraps all simple terms (pattern characters, character classes).
350 : OpTerm,
351 : // Where an expression contains only 'once through' body alternatives
352 : // and no repeating ones, this op is used to return match failure.
353 : OpMatchFailed
354 : };
355 :
356 : // This structure is used to hold the compiled opcode information,
357 : // including reference back to the original PatternTerm/PatternAlternatives,
358 : // and JIT compilation data structures.
359 4935478 : struct YarrOp {
360 344655 : explicit YarrOp(PatternTerm* term)
361 : : m_op(OpTerm)
362 : , m_term(term)
363 344655 : , m_isDeadCode(false)
364 : {
365 344655 : }
366 :
367 342639 : explicit YarrOp(YarrOpCode op)
368 : : m_op(op)
369 342639 : , m_isDeadCode(false)
370 : {
371 342639 : }
372 :
373 : // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
374 : YarrOpCode m_op;
375 : PatternTerm* m_term;
376 :
377 : // For alternatives, this holds the PatternAlternative and doubly linked
378 : // references to this alternative's siblings. In the case of the
379 : // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
380 : // m_nextOp will reference the OpBodyAlternativeBegin node of the first
381 : // repeating alternative.
382 : PatternAlternative* m_alternative;
383 : size_t m_previousOp;
384 : size_t m_nextOp;
385 :
386 : // Used to record a set of Jumps out of the generated code, typically
387 : // used for jumps out to backtracking code, and a single reentry back
388 : // into the code for a node (likely where a backtrack will trigger
389 : // rematching).
390 : Label m_reentry;
391 : JumpList m_jumps;
392 :
393 : // This flag is used to null out the second pattern character, when
394 : // two are fused to match a pair together.
395 : bool m_isDeadCode;
396 :
397 : // Currently used in the case of some of the more complex management of
398 : // 'm_checked', to cache the offset used in this alternative, to avoid
399 : // recalculating it.
400 : int m_checkAdjust;
401 :
402 : // Used by OpNestedAlternativeNext/End to hold the pointer to the
403 : // value that will be pushed into the pattern's frame to return to,
404 : // upon backtracking back into the disjunction.
405 : DataLabelPtr m_returnAddress;
406 : };
407 :
408 : // BacktrackingState
409 : // This class encapsulates information about the state of code generation
410 : // whilst generating the code for backtracking, when a term fails to match.
411 : // Upon entry to code generation of the backtracking code for a given node,
412 : // the Backtracking state will hold references to all control flow sources
413 : // that are outputs in need of further backtracking from the prior node
414 : // generated (which is the subsequent operation in the regular expression,
415 : // and in the m_ops Vector, since we generated backtracking backwards).
416 : // These references to control flow take the form of:
417 : // - A jump list of jumps, to be linked to code that will backtrack them
418 : // further.
419 : // - A set of DataLabelPtr values, to be populated with values to be
420 : // treated effectively as return addresses backtracking into complex
421 : // subpatterns.
422 : // - A flag indicating that the current sequence of generated code up to
423 : // this point requires backtracking.
424 58226 : class BacktrackingState {
425 : public:
426 58226 : BacktrackingState()
427 58226 : : m_pendingFallthrough(false)
428 : {
429 58226 : }
430 :
431 : // Add a jump or jumps, a return address, or set the flag indicating
432 : // that the current 'fallthrough' control flow requires backtracking.
433 35190 : void append(const Jump& jump)
434 : {
435 35190 : m_laterFailures.append(jump);
436 35190 : }
437 399072 : void append(JumpList& jumpList)
438 : {
439 399072 : m_laterFailures.append(jumpList);
440 399072 : }
441 6569 : void append(const DataLabelPtr& returnAddress)
442 : {
443 6569 : m_pendingReturns.append(returnAddress);
444 6569 : }
445 54417 : void fallthrough()
446 : {
447 54417 : ASSERT(!m_pendingFallthrough);
448 54417 : m_pendingFallthrough = true;
449 54417 : }
450 :
451 : // These methods clear the backtracking state, either linking to the
452 : // current location, a provided label, or copying the backtracking out
453 : // to a JumpList. All actions may require code generation to take place,
454 : // and as such are passed a pointer to the assembler.
455 132468 : void link(MacroAssembler* assembler)
456 : {
457 132468 : if (m_pendingReturns.size()) {
458 1805 : Label here(assembler);
459 3610 : for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
460 1805 : m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
461 1805 : m_pendingReturns.clear();
462 : }
463 132468 : m_laterFailures.link(assembler);
464 132468 : m_laterFailures.clear();
465 132468 : m_pendingFallthrough = false;
466 132468 : }
467 23604 : void linkTo(Label label, MacroAssembler* assembler)
468 : {
469 23604 : if (m_pendingReturns.size()) {
470 5088 : for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
471 2544 : m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
472 2544 : m_pendingReturns.clear();
473 : }
474 23604 : if (m_pendingFallthrough)
475 10021 : assembler->jump(label);
476 23604 : m_laterFailures.linkTo(label, assembler);
477 23604 : m_laterFailures.clear();
478 23604 : m_pendingFallthrough = false;
479 23604 : }
480 2721 : void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
481 : {
482 2721 : if (m_pendingReturns.size()) {
483 2220 : Label here(assembler);
484 4440 : for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
485 2220 : m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
486 2220 : m_pendingReturns.clear();
487 2220 : m_pendingFallthrough = true;
488 : }
489 2721 : if (m_pendingFallthrough)
490 2220 : jumpList.append(assembler->jump());
491 2721 : jumpList.append(m_laterFailures);
492 2721 : m_laterFailures.clear();
493 2721 : m_pendingFallthrough = false;
494 2721 : }
495 :
496 113408 : bool isEmpty()
497 : {
498 113408 : return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
499 : }
500 :
501 : // Called at the end of code generation to link all return addresses.
502 55269 : void linkDataLabels(LinkBuffer& linkBuffer)
503 : {
504 55269 : ASSERT(isEmpty());
505 61838 : for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
506 6569 : linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
507 55269 : }
508 :
509 : private:
510 8417 : struct ReturnAddressRecord {
511 6569 : ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
512 : : m_dataLabel(dataLabel)
513 6569 : , m_backtrackLocation(backtrackLocation)
514 : {
515 6569 : }
516 :
517 : DataLabelPtr m_dataLabel;
518 : Label m_backtrackLocation;
519 : };
520 :
521 : JumpList m_laterFailures;
522 : bool m_pendingFallthrough;
523 : Vector<DataLabelPtr, 4> m_pendingReturns;
524 : Vector<ReturnAddressRecord, 4> m_backtrackRecords;
525 : };
526 :
527 : // Generation methods:
528 : // ===================
529 :
530 : // This method provides a default implementation of backtracking common
531 : // to many terms; terms commonly jump out of the forwards matching path
532 : // on any failed conditions, and add these jumps to the m_jumps list. If
533 : // no special handling is required we can often just backtrack to m_jumps.
534 264162 : void backtrackTermDefault(size_t opIndex)
535 : {
536 264162 : YarrOp& op = m_ops[opIndex];
537 264162 : m_backtrackingState.append(op.m_jumps);
538 264162 : }
539 :
540 16877 : void generateAssertionBOL(size_t opIndex)
541 : {
542 16877 : YarrOp& op = m_ops[opIndex];
543 16877 : PatternTerm* term = op.m_term;
544 :
545 16877 : if (m_pattern.m_multiline) {
546 110 : const RegisterID character = regT0;
547 :
548 220 : JumpList matchDest;
549 110 : if (!term->inputPosition)
550 110 : matchDest.append(branch32(Equal, index, Imm32(m_checked)));
551 :
552 110 : readCharacter((term->inputPosition - m_checked) - 1, character);
553 110 : matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
554 110 : op.m_jumps.append(jump());
555 :
556 110 : matchDest.link(this);
557 : } else {
558 : // Erk, really should poison out these alternatives early. :-/
559 16767 : if (term->inputPosition)
560 0 : op.m_jumps.append(jump());
561 : else
562 16767 : op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
563 : }
564 16877 : }
565 16877 : void backtrackAssertionBOL(size_t opIndex)
566 : {
567 16877 : backtrackTermDefault(opIndex);
568 16877 : }
569 :
570 18781 : void generateAssertionEOL(size_t opIndex)
571 : {
572 18781 : YarrOp& op = m_ops[opIndex];
573 18781 : PatternTerm* term = op.m_term;
574 :
575 18781 : if (m_pattern.m_multiline) {
576 94 : const RegisterID character = regT0;
577 :
578 188 : JumpList matchDest;
579 94 : if (term->inputPosition == m_checked)
580 94 : matchDest.append(atEndOfInput());
581 :
582 94 : readCharacter((term->inputPosition - m_checked), character);
583 94 : matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
584 94 : op.m_jumps.append(jump());
585 :
586 94 : matchDest.link(this);
587 : } else {
588 18687 : if (term->inputPosition == m_checked)
589 13287 : op.m_jumps.append(notAtEndOfInput());
590 : // Erk, really should poison out these alternatives early. :-/
591 : else
592 5400 : op.m_jumps.append(jump());
593 : }
594 18781 : }
595 18781 : void backtrackAssertionEOL(size_t opIndex)
596 : {
597 18781 : backtrackTermDefault(opIndex);
598 18781 : }
599 :
600 : // Also falls though on nextIsNotWordChar.
601 828 : void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
602 : {
603 828 : YarrOp& op = m_ops[opIndex];
604 828 : PatternTerm* term = op.m_term;
605 :
606 828 : const RegisterID character = regT0;
607 :
608 828 : if (term->inputPosition == m_checked)
609 504 : nextIsNotWordChar.append(atEndOfInput());
610 :
611 828 : readCharacter((term->inputPosition - m_checked), character);
612 828 : matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
613 828 : }
614 :
615 414 : void generateAssertionWordBoundary(size_t opIndex)
616 : {
617 414 : YarrOp& op = m_ops[opIndex];
618 414 : PatternTerm* term = op.m_term;
619 :
620 414 : const RegisterID character = regT0;
621 :
622 414 : Jump atBegin;
623 828 : JumpList matchDest;
624 414 : if (!term->inputPosition)
625 162 : atBegin = branch32(Equal, index, Imm32(m_checked));
626 414 : readCharacter((term->inputPosition - m_checked) - 1, character);
627 414 : matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
628 414 : if (!term->inputPosition)
629 162 : atBegin.link(this);
630 :
631 : // We fall through to here if the last character was not a wordchar.
632 828 : JumpList nonWordCharThenWordChar;
633 828 : JumpList nonWordCharThenNonWordChar;
634 414 : if (term->invert()) {
635 0 : matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
636 0 : nonWordCharThenWordChar.append(jump());
637 : } else {
638 414 : matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
639 414 : nonWordCharThenNonWordChar.append(jump());
640 : }
641 414 : op.m_jumps.append(nonWordCharThenNonWordChar);
642 :
643 : // We jump here if the last character was a wordchar.
644 414 : matchDest.link(this);
645 828 : JumpList wordCharThenWordChar;
646 828 : JumpList wordCharThenNonWordChar;
647 414 : if (term->invert()) {
648 0 : matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
649 0 : wordCharThenWordChar.append(jump());
650 : } else {
651 414 : matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
652 : // This can fall-though!
653 : }
654 :
655 414 : op.m_jumps.append(wordCharThenWordChar);
656 :
657 414 : nonWordCharThenWordChar.link(this);
658 414 : wordCharThenNonWordChar.link(this);
659 414 : }
660 414 : void backtrackAssertionWordBoundary(size_t opIndex)
661 : {
662 414 : backtrackTermDefault(opIndex);
663 414 : }
664 :
665 196331 : void generatePatternCharacterOnce(size_t opIndex)
666 : {
667 196331 : YarrOp& op = m_ops[opIndex];
668 :
669 : // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
670 : // node, so there must always be at least one more node.
671 196331 : ASSERT(opIndex + 1 < m_ops.size());
672 196331 : YarrOp& nextOp = m_ops[opIndex + 1];
673 :
674 196331 : if (op.m_isDeadCode)
675 72224 : return;
676 :
677 124107 : PatternTerm* term = op.m_term;
678 124107 : UChar ch = term->patternCharacter;
679 :
680 124107 : const RegisterID character = regT0;
681 :
682 124107 : if (nextOp.m_op == OpTerm) {
683 85452 : PatternTerm* nextTerm = nextOp.m_term;
684 85452 : if (nextTerm->type == PatternTerm::TypePatternCharacter
685 : && nextTerm->quantityType == QuantifierFixedCount
686 : && nextTerm->quantityCount == 1
687 : && nextTerm->inputPosition == (term->inputPosition + 1)) {
688 :
689 72224 : UChar ch2 = nextTerm->patternCharacter;
690 :
691 72224 : int mask = 0;
692 : #if WTF_CPU_BIG_ENDIAN
693 : int chPair = ch2 | (ch << 16);
694 : #else
695 72224 : int chPair = ch | (ch2 << 16);
696 : #endif
697 :
698 72224 : if (m_pattern.m_ignoreCase) {
699 : #if WTF_CPU_BIG_ENDIAN
700 : if (isASCIIAlpha(ch))
701 : mask |= 32 << 16;
702 : if (isASCIIAlpha(ch2))
703 : mask |= 32;
704 : #else
705 3426 : if (isASCIIAlpha(ch))
706 2561 : mask |= 32;
707 3426 : if (isASCIIAlpha(ch2))
708 2595 : mask |= 32 << 16;
709 : #endif
710 : }
711 :
712 72224 : BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
713 72224 : if (mask) {
714 2702 : load32WithUnalignedHalfWords(address, character);
715 2702 : or32(Imm32(mask), character);
716 2702 : op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
717 : } else
718 69522 : op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
719 :
720 72224 : nextOp.m_isDeadCode = true;
721 72224 : return;
722 : }
723 : }
724 :
725 51883 : if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
726 412 : readCharacter(term->inputPosition - m_checked, character);
727 412 : or32(TrustedImm32(32), character);
728 412 : op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
729 : } else {
730 51471 : ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
731 51471 : op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
732 : }
733 : }
734 196331 : void backtrackPatternCharacterOnce(size_t opIndex)
735 : {
736 196331 : backtrackTermDefault(opIndex);
737 196331 : }
738 :
739 0 : void generatePatternCharacterFixed(size_t opIndex)
740 : {
741 0 : YarrOp& op = m_ops[opIndex];
742 0 : PatternTerm* term = op.m_term;
743 0 : UChar ch = term->patternCharacter;
744 :
745 0 : const RegisterID character = regT0;
746 0 : const RegisterID countRegister = regT1;
747 :
748 0 : move(index, countRegister);
749 0 : sub32(Imm32(term->quantityCount), countRegister);
750 :
751 0 : Label loop(this);
752 0 : BaseIndex address(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar));
753 :
754 0 : if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
755 0 : load16(address, character);
756 0 : or32(TrustedImm32(32), character);
757 0 : op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
758 : } else {
759 0 : ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
760 0 : op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
761 : }
762 0 : add32(TrustedImm32(1), countRegister);
763 0 : branch32(NotEqual, countRegister, index).linkTo(loop, this);
764 0 : }
765 0 : void backtrackPatternCharacterFixed(size_t opIndex)
766 : {
767 0 : backtrackTermDefault(opIndex);
768 0 : }
769 :
770 5960 : void generatePatternCharacterGreedy(size_t opIndex)
771 : {
772 5960 : YarrOp& op = m_ops[opIndex];
773 5960 : PatternTerm* term = op.m_term;
774 5960 : UChar ch = term->patternCharacter;
775 :
776 5960 : const RegisterID character = regT0;
777 5960 : const RegisterID countRegister = regT1;
778 :
779 5960 : move(TrustedImm32(0), countRegister);
780 :
781 11920 : JumpList failures;
782 5960 : Label loop(this);
783 5960 : failures.append(atEndOfInput());
784 5960 : if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
785 1 : readCharacter(term->inputPosition - m_checked, character);
786 1 : or32(TrustedImm32(32), character);
787 1 : failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
788 : } else {
789 5959 : ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
790 5959 : failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
791 : }
792 :
793 5960 : add32(TrustedImm32(1), countRegister);
794 5960 : add32(TrustedImm32(1), index);
795 5960 : if (term->quantityCount == quantifyInfinite)
796 5423 : jump(loop);
797 : else
798 537 : branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this);
799 :
800 5960 : failures.link(this);
801 5960 : op.m_reentry = label();
802 :
803 5960 : storeToFrame(countRegister, term->frameLocation);
804 :
805 5960 : }
806 5960 : void backtrackPatternCharacterGreedy(size_t opIndex)
807 : {
808 5960 : YarrOp& op = m_ops[opIndex];
809 5960 : PatternTerm* term = op.m_term;
810 :
811 5960 : const RegisterID countRegister = regT1;
812 :
813 5960 : m_backtrackingState.link(this);
814 :
815 5960 : loadFromFrame(term->frameLocation, countRegister);
816 5960 : m_backtrackingState.append(branchTest32(Zero, countRegister));
817 5960 : sub32(TrustedImm32(1), countRegister);
818 5960 : sub32(TrustedImm32(1), index);
819 5960 : jump(op.m_reentry);
820 5960 : }
821 :
822 0 : void generatePatternCharacterNonGreedy(size_t opIndex)
823 : {
824 0 : YarrOp& op = m_ops[opIndex];
825 0 : PatternTerm* term = op.m_term;
826 :
827 0 : const RegisterID countRegister = regT1;
828 :
829 0 : move(TrustedImm32(0), countRegister);
830 0 : op.m_reentry = label();
831 0 : storeToFrame(countRegister, term->frameLocation);
832 0 : }
833 0 : void backtrackPatternCharacterNonGreedy(size_t opIndex)
834 : {
835 0 : YarrOp& op = m_ops[opIndex];
836 0 : PatternTerm* term = op.m_term;
837 0 : UChar ch = term->patternCharacter;
838 :
839 0 : const RegisterID character = regT0;
840 0 : const RegisterID countRegister = regT1;
841 :
842 0 : JumpList nonGreedyFailures;
843 :
844 0 : m_backtrackingState.link(this);
845 :
846 0 : loadFromFrame(term->frameLocation, countRegister);
847 :
848 0 : nonGreedyFailures.append(atEndOfInput());
849 0 : if (term->quantityCount != quantifyInfinite)
850 0 : nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount)));
851 0 : if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
852 0 : readCharacter(term->inputPosition - m_checked, character);
853 0 : or32(TrustedImm32(32), character);
854 0 : nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
855 : } else {
856 0 : ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
857 0 : nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
858 : }
859 :
860 0 : add32(TrustedImm32(1), countRegister);
861 0 : add32(TrustedImm32(1), index);
862 :
863 0 : jump(op.m_reentry);
864 :
865 0 : nonGreedyFailures.link(this);
866 0 : sub32(countRegister, index);
867 0 : m_backtrackingState.fallthrough();
868 0 : }
869 :
870 28855 : void generateCharacterClassOnce(size_t opIndex)
871 : {
872 28855 : YarrOp& op = m_ops[opIndex];
873 28855 : PatternTerm* term = op.m_term;
874 :
875 28855 : const RegisterID character = regT0;
876 :
877 57710 : JumpList matchDest;
878 28855 : readCharacter((term->inputPosition - m_checked), character);
879 28855 : matchCharacterClass(character, matchDest, term->characterClass);
880 :
881 28855 : if (term->invert())
882 5386 : op.m_jumps.append(matchDest);
883 : else {
884 23469 : op.m_jumps.append(jump());
885 23469 : matchDest.link(this);
886 : }
887 28855 : }
888 28855 : void backtrackCharacterClassOnce(size_t opIndex)
889 : {
890 28855 : backtrackTermDefault(opIndex);
891 28855 : }
892 :
893 2904 : void generateCharacterClassFixed(size_t opIndex)
894 : {
895 2904 : YarrOp& op = m_ops[opIndex];
896 2904 : PatternTerm* term = op.m_term;
897 :
898 2904 : const RegisterID character = regT0;
899 2904 : const RegisterID countRegister = regT1;
900 :
901 2904 : move(index, countRegister);
902 2904 : sub32(Imm32(term->quantityCount), countRegister);
903 :
904 2904 : Label loop(this);
905 5808 : JumpList matchDest;
906 2904 : load16(BaseIndex(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar)), character);
907 2904 : matchCharacterClass(character, matchDest, term->characterClass);
908 :
909 2904 : if (term->invert())
910 19 : op.m_jumps.append(matchDest);
911 : else {
912 2885 : op.m_jumps.append(jump());
913 2885 : matchDest.link(this);
914 : }
915 :
916 2904 : add32(TrustedImm32(1), countRegister);
917 2904 : branch32(NotEqual, countRegister, index).linkTo(loop, this);
918 2904 : }
919 2904 : void backtrackCharacterClassFixed(size_t opIndex)
920 : {
921 2904 : backtrackTermDefault(opIndex);
922 2904 : }
923 :
924 29230 : void generateCharacterClassGreedy(size_t opIndex)
925 : {
926 29230 : YarrOp& op = m_ops[opIndex];
927 29230 : PatternTerm* term = op.m_term;
928 :
929 29230 : const RegisterID character = regT0;
930 29230 : const RegisterID countRegister = regT1;
931 :
932 29230 : move(TrustedImm32(0), countRegister);
933 :
934 58460 : JumpList failures;
935 29230 : Label loop(this);
936 29230 : failures.append(atEndOfInput());
937 :
938 29230 : if (term->invert()) {
939 2188 : readCharacter(term->inputPosition - m_checked, character);
940 2188 : matchCharacterClass(character, failures, term->characterClass);
941 : } else {
942 54084 : JumpList matchDest;
943 27042 : readCharacter(term->inputPosition - m_checked, character);
944 27042 : matchCharacterClass(character, matchDest, term->characterClass);
945 27042 : failures.append(jump());
946 27042 : matchDest.link(this);
947 : }
948 :
949 29230 : add32(TrustedImm32(1), countRegister);
950 29230 : add32(TrustedImm32(1), index);
951 29230 : if (term->quantityCount != quantifyInfinite) {
952 130 : branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this);
953 130 : failures.append(jump());
954 : } else
955 29100 : jump(loop);
956 :
957 29230 : failures.link(this);
958 29230 : op.m_reentry = label();
959 :
960 29230 : storeToFrame(countRegister, term->frameLocation);
961 29230 : }
962 29230 : void backtrackCharacterClassGreedy(size_t opIndex)
963 : {
964 29230 : YarrOp& op = m_ops[opIndex];
965 29230 : PatternTerm* term = op.m_term;
966 :
967 29230 : const RegisterID countRegister = regT1;
968 :
969 29230 : m_backtrackingState.link(this);
970 :
971 29230 : loadFromFrame(term->frameLocation, countRegister);
972 29230 : m_backtrackingState.append(branchTest32(Zero, countRegister));
973 29230 : sub32(TrustedImm32(1), countRegister);
974 29230 : sub32(TrustedImm32(1), index);
975 29230 : jump(op.m_reentry);
976 29230 : }
977 :
978 87 : void generateCharacterClassNonGreedy(size_t opIndex)
979 : {
980 87 : YarrOp& op = m_ops[opIndex];
981 87 : PatternTerm* term = op.m_term;
982 :
983 87 : const RegisterID countRegister = regT1;
984 :
985 87 : move(TrustedImm32(0), countRegister);
986 87 : op.m_reentry = label();
987 87 : storeToFrame(countRegister, term->frameLocation);
988 87 : }
989 87 : void backtrackCharacterClassNonGreedy(size_t opIndex)
990 : {
991 87 : YarrOp& op = m_ops[opIndex];
992 87 : PatternTerm* term = op.m_term;
993 :
994 87 : const RegisterID character = regT0;
995 87 : const RegisterID countRegister = regT1;
996 :
997 174 : JumpList nonGreedyFailures;
998 :
999 87 : m_backtrackingState.link(this);
1000 :
1001 87 : Label backtrackBegin(this);
1002 87 : loadFromFrame(term->frameLocation, countRegister);
1003 :
1004 87 : nonGreedyFailures.append(atEndOfInput());
1005 87 : nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount)));
1006 :
1007 174 : JumpList matchDest;
1008 87 : readCharacter(term->inputPosition - m_checked, character);
1009 87 : matchCharacterClass(character, matchDest, term->characterClass);
1010 :
1011 87 : if (term->invert())
1012 87 : nonGreedyFailures.append(matchDest);
1013 : else {
1014 0 : nonGreedyFailures.append(jump());
1015 0 : matchDest.link(this);
1016 : }
1017 :
1018 87 : add32(TrustedImm32(1), countRegister);
1019 87 : add32(TrustedImm32(1), index);
1020 :
1021 87 : jump(op.m_reentry);
1022 :
1023 87 : nonGreedyFailures.link(this);
1024 87 : sub32(countRegister, index);
1025 87 : m_backtrackingState.fallthrough();
1026 87 : }
1027 :
1028 : // Code generation/backtracking for simple terms
1029 : // (pattern characters, character classes, and assertions).
1030 : // These methods farm out work to the set of functions above.
1031 299439 : void generateTerm(size_t opIndex)
1032 : {
1033 299439 : YarrOp& op = m_ops[opIndex];
1034 299439 : PatternTerm* term = op.m_term;
1035 :
1036 299439 : switch (term->type) {
1037 : case PatternTerm::TypePatternCharacter:
1038 202291 : switch (term->quantityType) {
1039 : case QuantifierFixedCount:
1040 196331 : if (term->quantityCount == 1)
1041 196331 : generatePatternCharacterOnce(opIndex);
1042 : else
1043 0 : generatePatternCharacterFixed(opIndex);
1044 196331 : break;
1045 : case QuantifierGreedy:
1046 5960 : generatePatternCharacterGreedy(opIndex);
1047 5960 : break;
1048 : case QuantifierNonGreedy:
1049 0 : generatePatternCharacterNonGreedy(opIndex);
1050 0 : break;
1051 : }
1052 202291 : break;
1053 :
1054 : case PatternTerm::TypeCharacterClass:
1055 61076 : switch (term->quantityType) {
1056 : case QuantifierFixedCount:
1057 31759 : if (term->quantityCount == 1)
1058 28855 : generateCharacterClassOnce(opIndex);
1059 : else
1060 2904 : generateCharacterClassFixed(opIndex);
1061 31759 : break;
1062 : case QuantifierGreedy:
1063 29230 : generateCharacterClassGreedy(opIndex);
1064 29230 : break;
1065 : case QuantifierNonGreedy:
1066 87 : generateCharacterClassNonGreedy(opIndex);
1067 87 : break;
1068 : }
1069 61076 : break;
1070 :
1071 : case PatternTerm::TypeAssertionBOL:
1072 16877 : generateAssertionBOL(opIndex);
1073 16877 : break;
1074 :
1075 : case PatternTerm::TypeAssertionEOL:
1076 18781 : generateAssertionEOL(opIndex);
1077 18781 : break;
1078 :
1079 : case PatternTerm::TypeAssertionWordBoundary:
1080 414 : generateAssertionWordBoundary(opIndex);
1081 414 : break;
1082 :
1083 : case PatternTerm::TypeForwardReference:
1084 0 : break;
1085 :
1086 : case PatternTerm::TypeParenthesesSubpattern:
1087 : case PatternTerm::TypeParentheticalAssertion:
1088 0 : ASSERT_NOT_REACHED();
1089 : case PatternTerm::TypeBackReference:
1090 0 : m_shouldFallBack = true;
1091 0 : break;
1092 : }
1093 299439 : }
1094 299439 : void backtrackTerm(size_t opIndex)
1095 : {
1096 299439 : YarrOp& op = m_ops[opIndex];
1097 299439 : PatternTerm* term = op.m_term;
1098 :
1099 299439 : switch (term->type) {
1100 : case PatternTerm::TypePatternCharacter:
1101 202291 : switch (term->quantityType) {
1102 : case QuantifierFixedCount:
1103 196331 : if (term->quantityCount == 1)
1104 196331 : backtrackPatternCharacterOnce(opIndex);
1105 : else
1106 0 : backtrackPatternCharacterFixed(opIndex);
1107 196331 : break;
1108 : case QuantifierGreedy:
1109 5960 : backtrackPatternCharacterGreedy(opIndex);
1110 5960 : break;
1111 : case QuantifierNonGreedy:
1112 0 : backtrackPatternCharacterNonGreedy(opIndex);
1113 0 : break;
1114 : }
1115 202291 : break;
1116 :
1117 : case PatternTerm::TypeCharacterClass:
1118 61076 : switch (term->quantityType) {
1119 : case QuantifierFixedCount:
1120 31759 : if (term->quantityCount == 1)
1121 28855 : backtrackCharacterClassOnce(opIndex);
1122 : else
1123 2904 : backtrackCharacterClassFixed(opIndex);
1124 31759 : break;
1125 : case QuantifierGreedy:
1126 29230 : backtrackCharacterClassGreedy(opIndex);
1127 29230 : break;
1128 : case QuantifierNonGreedy:
1129 87 : backtrackCharacterClassNonGreedy(opIndex);
1130 87 : break;
1131 : }
1132 61076 : break;
1133 :
1134 : case PatternTerm::TypeAssertionBOL:
1135 16877 : backtrackAssertionBOL(opIndex);
1136 16877 : break;
1137 :
1138 : case PatternTerm::TypeAssertionEOL:
1139 18781 : backtrackAssertionEOL(opIndex);
1140 18781 : break;
1141 :
1142 : case PatternTerm::TypeAssertionWordBoundary:
1143 414 : backtrackAssertionWordBoundary(opIndex);
1144 414 : break;
1145 :
1146 : case PatternTerm::TypeForwardReference:
1147 0 : break;
1148 :
1149 : case PatternTerm::TypeParenthesesSubpattern:
1150 : case PatternTerm::TypeParentheticalAssertion:
1151 0 : ASSERT_NOT_REACHED();
1152 : case PatternTerm::TypeBackReference:
1153 0 : m_shouldFallBack = true;
1154 0 : break;
1155 : }
1156 299439 : }
1157 :
1158 55269 : void generate()
1159 : {
1160 : // Forwards generate the matching code.
1161 55269 : ASSERT(m_ops.size());
1162 55269 : size_t opIndex = 0;
1163 :
1164 607082 : do {
1165 607082 : YarrOp& op = m_ops[opIndex];
1166 607082 : switch (op.m_op) {
1167 :
1168 : case OpTerm:
1169 299439 : generateTerm(opIndex);
1170 299439 : break;
1171 :
1172 : // OpBodyAlternativeBegin/Next/End
1173 : //
1174 : // These nodes wrap the set of alternatives in the body of the regular expression.
1175 : // There may be either one or two chains of OpBodyAlternative nodes, one representing
1176 : // the 'once through' sequence of alternatives (if any exist), and one representing
1177 : // the repeating alternatives (again, if any exist).
1178 : //
1179 : // Upon normal entry to the Begin alternative, we will check that input is available.
1180 : // Reentry to the Begin alternative will take place after the check has taken place,
1181 : // and will assume that the input position has already been progressed as appropriate.
1182 : //
1183 : // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1184 : // successfully completed a match - return a success state from JIT code.
1185 : //
1186 : // Next alternatives allow for reentry optimized to suit backtracking from its
1187 : // preceding alternative. It expects the input position to still be set to a position
1188 : // appropriate to its predecessor, and it will only perform an input check if the
1189 : // predecessor had a minimum size less than its own.
1190 : //
1191 : // In the case 'once through' expressions, the End node will also have a reentry
1192 : // point to jump to when the last alternative fails. Again, this expects the input
1193 : // position to still reflect that expected by the prior alternative.
1194 : case OpBodyAlternativeBegin: {
1195 55422 : PatternAlternative* alternative = op.m_alternative;
1196 :
1197 : // Upon entry at the head of the set of alternatives, check if input is available
1198 : // to run the first alternative. (This progresses the input position).
1199 55422 : op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1200 : // We will reenter after the check, and assume the input position to have been
1201 : // set as appropriate to this alternative.
1202 55422 : op.m_reentry = label();
1203 :
1204 55422 : m_checked += alternative->m_minimumSize;
1205 55422 : break;
1206 : }
1207 : case OpBodyAlternativeNext:
1208 : case OpBodyAlternativeEnd: {
1209 56943 : PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1210 56943 : PatternAlternative* alternative = op.m_alternative;
1211 :
1212 : // If we get here, the prior alternative matched - return success.
1213 :
1214 : // Adjust the stack pointer to remove the pattern's frame.
1215 : #if !WTF_CPU_SPARC
1216 56943 : if (m_pattern.m_body->m_callFrameSize)
1217 24520 : addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1218 : #endif
1219 :
1220 : // Load appropriate values into the return register and the first output
1221 : // slot, and return. In the case of pattern with a fixed size, we will
1222 : // not have yet set the value in the first
1223 : ASSERT(index != returnRegister);
1224 56943 : if (m_pattern.m_body->m_hasFixedSize) {
1225 15695 : move(index, returnRegister);
1226 15695 : if (priorAlternative->m_minimumSize)
1227 15650 : sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1228 15695 : store32(returnRegister, output);
1229 : } else
1230 41248 : load32(Address(output), returnRegister);
1231 56943 : store32(index, Address(output, 4));
1232 56943 : generateReturn();
1233 :
1234 : // This is the divide between the tail of the prior alternative, above, and
1235 : // the head of the subsequent alternative, below.
1236 :
1237 56943 : if (op.m_op == OpBodyAlternativeNext) {
1238 : // This is the reentry point for the Next alternative. We expect any code
1239 : // that jumps here to do so with the input position matching that of the
1240 : // PRIOR alteranative, and we will only check input availability if we
1241 : // need to progress it forwards.
1242 1521 : op.m_reentry = label();
1243 1521 : if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1244 378 : add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1245 378 : op.m_jumps.append(jumpIfNoAvailableInput());
1246 1143 : } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1247 810 : sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1248 55422 : } else if (op.m_nextOp == notFound) {
1249 : // This is the reentry point for the End of 'once through' alternatives,
1250 : // jumped to when the las alternative fails to match.
1251 16767 : op.m_reentry = label();
1252 16767 : sub32(Imm32(priorAlternative->m_minimumSize), index);
1253 : }
1254 :
1255 56943 : if (op.m_op == OpBodyAlternativeNext)
1256 1521 : m_checked += alternative->m_minimumSize;
1257 56943 : m_checked -= priorAlternative->m_minimumSize;
1258 56943 : break;
1259 : }
1260 :
1261 : // OpSimpleNestedAlternativeBegin/Next/End
1262 : // OpNestedAlternativeBegin/Next/End
1263 : //
1264 : // These nodes are used to handle sets of alternatives that are nested within
1265 : // subpatterns and parenthetical assertions. The 'simple' forms are used where
1266 : // we do not need to be able to backtrack back into any alternative other than
1267 : // the last, the normal forms allow backtracking into any alternative.
1268 : //
1269 : // Each Begin/Next node is responsible for planting an input check to ensure
1270 : // sufficient input is available on entry. Next nodes additionally need to
1271 : // jump to the end - Next nodes use the End node's m_jumps list to hold this
1272 : // set of jumps.
1273 : //
1274 : // In the non-simple forms, successful alternative matches must store a
1275 : // 'return address' using a DataLabelPtr, used to store the address to jump
1276 : // to when backtracking, to get to the code for the appropriate alternative.
1277 : case OpSimpleNestedAlternativeBegin:
1278 : case OpNestedAlternativeBegin: {
1279 43754 : PatternTerm* term = op.m_term;
1280 43754 : PatternAlternative* alternative = op.m_alternative;
1281 43754 : PatternDisjunction* disjunction = term->parentheses.disjunction;
1282 :
1283 : // Calculate how much input we need to check for, and if non-zero check.
1284 43754 : op.m_checkAdjust = alternative->m_minimumSize;
1285 43754 : if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1286 34077 : op.m_checkAdjust -= disjunction->m_minimumSize;
1287 43754 : if (op.m_checkAdjust)
1288 10388 : op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1289 :
1290 43754 : m_checked += op.m_checkAdjust;
1291 43754 : break;
1292 : }
1293 : case OpSimpleNestedAlternativeNext:
1294 : case OpNestedAlternativeNext: {
1295 3648 : PatternTerm* term = op.m_term;
1296 3648 : PatternAlternative* alternative = op.m_alternative;
1297 3648 : PatternDisjunction* disjunction = term->parentheses.disjunction;
1298 :
1299 : // In the non-simple case, store a 'return address' so we can backtrack correctly.
1300 3648 : if (op.m_op == OpNestedAlternativeNext) {
1301 3648 : unsigned parenthesesFrameLocation = term->frameLocation;
1302 3648 : unsigned alternativeFrameLocation = parenthesesFrameLocation;
1303 3648 : if (term->quantityType != QuantifierFixedCount)
1304 2 : alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1305 3648 : op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1306 : }
1307 :
1308 : // If we reach here then the last alternative has matched - jump to the
1309 : // End node, to skip over any further alternatives.
1310 : //
1311 : // FIXME: this is logically O(N^2) (though N can be expected to be very
1312 : // small). We could avoid this either by adding an extra jump to the JIT
1313 : // data structures, or by making backtracking code that jumps to Next
1314 : // alternatives are responsible for checking that input is available (if
1315 : // we didn't need to plant the input checks, then m_jumps would be free).
1316 3648 : YarrOp* endOp = &m_ops[op.m_nextOp];
1317 11531 : while (endOp->m_nextOp != notFound) {
1318 4235 : ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1319 4235 : endOp = &m_ops[endOp->m_nextOp];
1320 : }
1321 3648 : ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1322 3648 : endOp->m_jumps.append(jump());
1323 :
1324 : // This is the entry point for the next alternative.
1325 3648 : op.m_reentry = label();
1326 :
1327 : // Calculate how much input we need to check for, and if non-zero check.
1328 3648 : op.m_checkAdjust = alternative->m_minimumSize;
1329 3648 : if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1330 3646 : op.m_checkAdjust -= disjunction->m_minimumSize;
1331 3648 : if (op.m_checkAdjust)
1332 554 : op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1333 :
1334 3648 : YarrOp& lastOp = m_ops[op.m_previousOp];
1335 3648 : m_checked -= lastOp.m_checkAdjust;
1336 3648 : m_checked += op.m_checkAdjust;
1337 3648 : break;
1338 : }
1339 : case OpSimpleNestedAlternativeEnd:
1340 : case OpNestedAlternativeEnd: {
1341 43754 : PatternTerm* term = op.m_term;
1342 :
1343 : // In the non-simple case, store a 'return address' so we can backtrack correctly.
1344 43754 : if (op.m_op == OpNestedAlternativeEnd) {
1345 2921 : unsigned parenthesesFrameLocation = term->frameLocation;
1346 2921 : unsigned alternativeFrameLocation = parenthesesFrameLocation;
1347 2921 : if (term->quantityType != QuantifierFixedCount)
1348 2 : alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1349 2921 : op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1350 : }
1351 :
1352 : // If this set of alternatives contains more than one alternative,
1353 : // then the Next nodes will have planted jumps to the End, and added
1354 : // them to this node's m_jumps list.
1355 43754 : op.m_jumps.link(this);
1356 43754 : op.m_jumps.clear();
1357 :
1358 43754 : YarrOp& lastOp = m_ops[op.m_previousOp];
1359 43754 : m_checked -= lastOp.m_checkAdjust;
1360 43754 : break;
1361 : }
1362 :
1363 : // OpParenthesesSubpatternOnceBegin/End
1364 : //
1365 : // These nodes support (optionally) capturing subpatterns, that have a
1366 : // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1367 : case OpParenthesesSubpatternOnceBegin: {
1368 41001 : PatternTerm* term = op.m_term;
1369 41001 : unsigned parenthesesFrameLocation = term->frameLocation;
1370 41001 : const RegisterID indexTemporary = regT0;
1371 41001 : ASSERT(term->quantityCount == 1);
1372 :
1373 : // Upon entry to a Greedy quantified set of parenthese store the index.
1374 : // We'll use this for two purposes:
1375 : // - To indicate which iteration we are on of mathing the remainder of
1376 : // the expression after the parentheses - the first, including the
1377 : // match within the parentheses, or the second having skipped over them.
1378 : // - To check for empty matches, which must be rejected.
1379 : //
1380 : // At the head of a NonGreedy set of parentheses we'll immediately set the
1381 : // value on the stack to -1 (indicating a match skipping the subpattern),
1382 : // and plant a jump to the end. We'll also plant a label to backtrack to
1383 : // to reenter the subpattern later, with a store to set up index on the
1384 : // second iteration.
1385 : //
1386 : // FIXME: for capturing parens, could use the index in the capture array?
1387 41001 : if (term->quantityType == QuantifierGreedy)
1388 6924 : storeToFrame(index, parenthesesFrameLocation);
1389 34077 : else if (term->quantityType == QuantifierNonGreedy) {
1390 0 : storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1391 0 : op.m_jumps.append(jump());
1392 0 : op.m_reentry = label();
1393 0 : storeToFrame(index, parenthesesFrameLocation);
1394 : }
1395 :
1396 : // If the parenthese are capturing, store the starting index value to the
1397 : // captures array, offsetting as necessary.
1398 : //
1399 : // FIXME: could avoid offsetting this value in JIT code, apply
1400 : // offsets only afterwards, at the point the results array is
1401 : // being accessed.
1402 41001 : if (term->capture()) {
1403 31038 : int offsetId = term->parentheses.subpatternId << 1;
1404 31038 : int inputOffset = term->inputPosition - m_checked;
1405 31038 : if (term->quantityType == QuantifierFixedCount)
1406 30816 : inputOffset -= term->parentheses.disjunction->m_minimumSize;
1407 31038 : if (inputOffset) {
1408 30553 : move(index, indexTemporary);
1409 30553 : add32(Imm32(inputOffset), indexTemporary);
1410 30553 : store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1411 : } else
1412 485 : store32(index, Address(output, offsetId * sizeof(int)));
1413 : }
1414 41001 : break;
1415 : }
1416 : case OpParenthesesSubpatternOnceEnd: {
1417 41001 : PatternTerm* term = op.m_term;
1418 41001 : unsigned parenthesesFrameLocation = term->frameLocation;
1419 41001 : const RegisterID indexTemporary = regT0;
1420 41001 : ASSERT(term->quantityCount == 1);
1421 :
1422 : // For Greedy/NonGreedy quantified parentheses, we must reject zero length
1423 : // matches. If the minimum size is know to be non-zero we need not check.
1424 41001 : if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
1425 9 : op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
1426 :
1427 : // If the parenthese are capturing, store the ending index value to the
1428 : // captures array, offsetting as necessary.
1429 : //
1430 : // FIXME: could avoid offsetting this value in JIT code, apply
1431 : // offsets only afterwards, at the point the results array is
1432 : // being accessed.
1433 41001 : if (term->capture()) {
1434 31038 : int offsetId = (term->parentheses.subpatternId << 1) + 1;
1435 31038 : int inputOffset = term->inputPosition - m_checked;
1436 31038 : if (inputOffset) {
1437 21828 : move(index, indexTemporary);
1438 21828 : add32(Imm32(inputOffset), indexTemporary);
1439 21828 : store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1440 : } else
1441 9210 : store32(index, Address(output, offsetId * sizeof(int)));
1442 : }
1443 :
1444 : // If the parentheses are quantified Greedy then add a label to jump back
1445 : // to if get a failed match from after the parentheses. For NonGreedy
1446 : // parentheses, link the jump from before the subpattern to here.
1447 41001 : if (term->quantityType == QuantifierGreedy)
1448 6924 : op.m_reentry = label();
1449 34077 : else if (term->quantityType == QuantifierNonGreedy) {
1450 0 : YarrOp& beginOp = m_ops[op.m_previousOp];
1451 0 : beginOp.m_jumps.link(this);
1452 : }
1453 41001 : break;
1454 : }
1455 :
1456 : // OpParenthesesSubpatternTerminalBegin/End
1457 : case OpParenthesesSubpatternTerminalBegin: {
1458 2717 : PatternTerm* term = op.m_term;
1459 2717 : ASSERT(term->quantityType == QuantifierGreedy);
1460 2717 : ASSERT(term->quantityCount == quantifyInfinite);
1461 2717 : ASSERT(!term->capture());
1462 :
1463 : // Upon entry set a label to loop back to.
1464 2717 : op.m_reentry = label();
1465 :
1466 : // Store the start index of the current match; we need to reject zero
1467 : // length matches.
1468 2717 : storeToFrame(index, term->frameLocation);
1469 2717 : break;
1470 : }
1471 : case OpParenthesesSubpatternTerminalEnd: {
1472 2717 : PatternTerm* term = op.m_term;
1473 :
1474 : // Check for zero length matches - if the match is non-zero, then we
1475 : // can accept it & loop back up to the head of the subpattern.
1476 2717 : YarrOp& beginOp = m_ops[op.m_previousOp];
1477 2717 : branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
1478 :
1479 : // Reject the match - backtrack back into the subpattern.
1480 2717 : op.m_jumps.append(jump());
1481 :
1482 : // This is the entry point to jump to when we stop matching - we will
1483 : // do so once the subpattern cannot match any more.
1484 2717 : op.m_reentry = label();
1485 2717 : break;
1486 : }
1487 :
1488 : // OpParentheticalAssertionBegin/End
1489 : case OpParentheticalAssertionBegin: {
1490 36 : PatternTerm* term = op.m_term;
1491 :
1492 : // Store the current index - assertions should not update index, so
1493 : // we will need to restore it upon a successful match.
1494 36 : unsigned parenthesesFrameLocation = term->frameLocation;
1495 36 : storeToFrame(index, parenthesesFrameLocation);
1496 :
1497 : // Check
1498 36 : op.m_checkAdjust = m_checked - term->inputPosition;
1499 36 : if (op.m_checkAdjust)
1500 18 : sub32(Imm32(op.m_checkAdjust), index);
1501 :
1502 36 : m_checked -= op.m_checkAdjust;
1503 36 : break;
1504 : }
1505 : case OpParentheticalAssertionEnd: {
1506 36 : PatternTerm* term = op.m_term;
1507 :
1508 : // Restore the input index value.
1509 36 : unsigned parenthesesFrameLocation = term->frameLocation;
1510 36 : loadFromFrame(parenthesesFrameLocation, index);
1511 :
1512 : // If inverted, a successful match of the assertion must be treated
1513 : // as a failure, so jump to backtracking.
1514 36 : if (term->invert()) {
1515 9 : op.m_jumps.append(jump());
1516 9 : op.m_reentry = label();
1517 : }
1518 :
1519 36 : YarrOp& lastOp = m_ops[op.m_previousOp];
1520 36 : m_checked += lastOp.m_checkAdjust;
1521 36 : break;
1522 : }
1523 :
1524 : case OpMatchFailed:
1525 : #if !WTF_CPU_SPARC
1526 16614 : if (m_pattern.m_body->m_callFrameSize)
1527 13051 : addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1528 : #endif
1529 16614 : move(TrustedImm32(-1), returnRegister);
1530 16614 : generateReturn();
1531 16614 : break;
1532 : }
1533 :
1534 607082 : ++opIndex;
1535 607082 : } while (opIndex < m_ops.size());
1536 55269 : }
1537 :
1538 55269 : void backtrack()
1539 : {
1540 : // Backwards generate the backtracking code.
1541 55269 : size_t opIndex = m_ops.size();
1542 55269 : ASSERT(opIndex);
1543 :
1544 607082 : do {
1545 607082 : --opIndex;
1546 607082 : YarrOp& op = m_ops[opIndex];
1547 607082 : switch (op.m_op) {
1548 :
1549 : case OpTerm:
1550 299439 : backtrackTerm(opIndex);
1551 299439 : break;
1552 :
1553 : // OpBodyAlternativeBegin/Next/End
1554 : //
1555 : // For each Begin/Next node representing an alternative, we need to decide what to do
1556 : // in two circumstances:
1557 : // - If we backtrack back into this node, from within the alternative.
1558 : // - If the input check at the head of the alternative fails (if this exists).
1559 : //
1560 : // We treat these two cases differently since in the former case we have slightly
1561 : // more information - since we are backtracking out of a prior alternative we know
1562 : // that at least enough input was available to run it. For example, given the regular
1563 : // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1564 : // character match of 'a'), then we need not perform an additional input availability
1565 : // check before running the second alternative.
1566 : //
1567 : // Backtracking required differs for the last alternative, which in the case of the
1568 : // repeating set of alternatives must loop. The code generated for the last alternative
1569 : // will also be used to handle all input check failures from any prior alternatives -
1570 : // these require similar functionality, in seeking the next available alternative for
1571 : // which there is sufficient input.
1572 : //
1573 : // Since backtracking of all other alternatives simply requires us to link backtracks
1574 : // to the reentry point for the subsequent alternative, we will only be generating any
1575 : // code when backtracking the last alternative.
1576 : case OpBodyAlternativeBegin:
1577 : case OpBodyAlternativeNext: {
1578 56943 : PatternAlternative* alternative = op.m_alternative;
1579 :
1580 56943 : if (op.m_op == OpBodyAlternativeNext) {
1581 1521 : PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1582 1521 : m_checked += priorAlternative->m_minimumSize;
1583 : }
1584 56943 : m_checked -= alternative->m_minimumSize;
1585 :
1586 : // Is this the last alternative? If not, then if we backtrack to this point we just
1587 : // need to jump to try to match the next alternative.
1588 56943 : if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1589 1521 : m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1590 1521 : break;
1591 : }
1592 55422 : YarrOp& endOp = m_ops[op.m_nextOp];
1593 :
1594 55422 : YarrOp* beginOp = &op;
1595 112365 : while (beginOp->m_op != OpBodyAlternativeBegin) {
1596 1521 : ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1597 1521 : beginOp = &m_ops[beginOp->m_previousOp];
1598 : }
1599 :
1600 55422 : bool onceThrough = endOp.m_nextOp == notFound;
1601 :
1602 : // First, generate code to handle cases where we backtrack out of an attempted match
1603 : // of the last alternative. If this is a 'once through' set of alternatives then we
1604 : // have nothing to do - link this straight through to the End.
1605 55422 : if (onceThrough)
1606 16767 : m_backtrackingState.linkTo(endOp.m_reentry, this);
1607 : else {
1608 : // If we don't need to move the input poistion, and the pattern has a fixed size
1609 : // (in which case we omit the store of the start index until the pattern has matched)
1610 : // then we can just link the backtrack out of the last alternative straight to the
1611 : // head of the first alternative.
1612 38655 : if (m_pattern.m_body->m_hasFixedSize
1613 : && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1614 : && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1615 9 : m_backtrackingState.linkTo(beginOp->m_reentry, this);
1616 : else {
1617 : // We need to generate a trampoline of code to execute before looping back
1618 : // around to the first alternative.
1619 38646 : m_backtrackingState.link(this);
1620 :
1621 : // If the pattern size is not fixed, then store the start index, for use if we match.
1622 38646 : if (!m_pattern.m_body->m_hasFixedSize) {
1623 27708 : if (alternative->m_minimumSize == 1)
1624 8609 : store32(index, Address(output));
1625 : else {
1626 19099 : move(index, regT0);
1627 19099 : if (alternative->m_minimumSize)
1628 19033 : sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1629 : else
1630 66 : add32(Imm32(1), regT0);
1631 19099 : store32(regT0, Address(output));
1632 : }
1633 : }
1634 :
1635 : // Generate code to loop. Check whether the last alternative is longer than the
1636 : // first (e.g. /a|xy/ or /a|xyz/).
1637 38646 : if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1638 : // We want to loop, and increment input position. If the delta is 1, it is
1639 : // already correctly incremented, if more than one then decrement as appropriate.
1640 45 : unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1641 45 : ASSERT(delta);
1642 45 : if (delta != 1)
1643 45 : sub32(Imm32(delta - 1), index);
1644 45 : jump(beginOp->m_reentry);
1645 : } else {
1646 : // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1647 : // be sufficent input available to handle this, so just fall through.
1648 38601 : unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1649 38601 : if (delta != 0xFFFFFFFFu) {
1650 : // We need to check input because we are incrementing the input.
1651 38601 : add32(Imm32(delta + 1), index);
1652 38601 : checkInput().linkTo(beginOp->m_reentry, this);
1653 : }
1654 : }
1655 : }
1656 : }
1657 :
1658 : // We can reach this point in the code in two ways:
1659 : // - Fallthrough from the code above (a repeating alternative backtracked out of its
1660 : // last alternative, and did not have sufficent input to run the first).
1661 : // - We will loop back up to the following label when a releating alternative loops,
1662 : // following a failed input check.
1663 : //
1664 : // Either way, we have just failed the input check for the first alternative.
1665 55422 : Label firstInputCheckFailed(this);
1666 :
1667 : // Generate code to handle input check failures from alternatives except the last.
1668 : // prevOp is the alternative we're handling a bail out from (initially Begin), and
1669 : // nextOp is the alternative we will be attempting to reenter into.
1670 : //
1671 : // We will link input check failures from the forwards matching path back to the code
1672 : // that can handle them.
1673 55422 : YarrOp* prevOp = beginOp;
1674 55422 : YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1675 112365 : while (nextOp->m_op != OpBodyAlternativeEnd) {
1676 1521 : prevOp->m_jumps.link(this);
1677 :
1678 : // We only get here if an input check fails, it is only worth checking again
1679 : // if the next alternative has a minimum size less than the last.
1680 1521 : if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1681 : // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1682 : // subtract delta back out, and reduce this code. Should performance test
1683 : // the benefit of this.
1684 810 : unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1685 810 : sub32(Imm32(delta), index);
1686 810 : Jump fail = jumpIfNoAvailableInput();
1687 810 : add32(Imm32(delta), index);
1688 810 : jump(nextOp->m_reentry);
1689 810 : fail.link(this);
1690 711 : } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1691 378 : add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1692 1521 : prevOp = nextOp;
1693 1521 : nextOp = &m_ops[nextOp->m_nextOp];
1694 : }
1695 :
1696 : // We fall through to here if there is insufficient input to run the last alternative.
1697 :
1698 : // If there is insufficient input to run the last alternative, then for 'once through'
1699 : // alternatives we are done - just jump back up into the forwards matching path at the End.
1700 55422 : if (onceThrough) {
1701 16767 : op.m_jumps.linkTo(endOp.m_reentry, this);
1702 16767 : jump(endOp.m_reentry);
1703 16767 : break;
1704 : }
1705 :
1706 : // For repeating alternatives, link any input check failure from the last alternative to
1707 : // this point.
1708 38655 : op.m_jumps.link(this);
1709 :
1710 38655 : bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1711 :
1712 : // Check for cases where input position is already incremented by 1 for the last
1713 : // alternative (this is particularly useful where the minimum size of the body
1714 : // disjunction is 0, e.g. /a*|b/).
1715 38655 : if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1716 : // index is already incremented by 1, so just store it now!
1717 8609 : store32(index, Address(output));
1718 8609 : needsToUpdateMatchStart = false;
1719 : }
1720 :
1721 : // Check whether there is sufficient input to loop. Increment the input position by
1722 : // one, and check. Also add in the minimum disjunction size before checking - there
1723 : // is no point in looping if we're just going to fail all the input checks around
1724 : // the next iteration.
1725 38655 : ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1726 38655 : if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1727 : // If the last alternative had the same minimum size as the disjunction,
1728 : // just simply increment input pos by 1, no adjustment based on minimum size.
1729 38475 : add32(Imm32(1), index);
1730 : } else {
1731 : // If the minumum for the last alternative was one greater than than that
1732 : // for the disjunction, we're already progressed by 1, nothing to do!
1733 180 : unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1734 180 : if (delta)
1735 54 : sub32(Imm32(delta), index);
1736 : }
1737 38655 : Jump matchFailed = jumpIfNoAvailableInput();
1738 :
1739 38655 : if (needsToUpdateMatchStart) {
1740 19099 : if (!m_pattern.m_body->m_minimumSize)
1741 75 : store32(index, Address(output));
1742 : else {
1743 19024 : move(index, regT0);
1744 19024 : sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1745 19024 : store32(regT0, Address(output));
1746 : }
1747 : }
1748 :
1749 : // Calculate how much more input the first alternative requires than the minimum
1750 : // for the body as a whole. If no more is needed then we dont need an additional
1751 : // input check here - jump straight back up to the start of the first alternative.
1752 38655 : if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1753 38097 : jump(beginOp->m_reentry);
1754 : else {
1755 558 : if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1756 558 : add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1757 : else
1758 0 : sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1759 558 : checkInput().linkTo(beginOp->m_reentry, this);
1760 558 : jump(firstInputCheckFailed);
1761 : }
1762 :
1763 : // We jump to here if we iterate to the point that there is insufficient input to
1764 : // run any matches, and need to return a failure state from JIT code.
1765 38655 : matchFailed.link(this);
1766 :
1767 : #if !WTF_CPU_SPARC
1768 38655 : if (m_pattern.m_body->m_callFrameSize)
1769 11181 : addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1770 : #endif
1771 38655 : move(TrustedImm32(-1), returnRegister);
1772 38655 : generateReturn();
1773 38655 : break;
1774 : }
1775 : case OpBodyAlternativeEnd: {
1776 : // We should never backtrack back into a body disjunction.
1777 55422 : ASSERT(m_backtrackingState.isEmpty());
1778 :
1779 55422 : PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1780 55422 : m_checked += priorAlternative->m_minimumSize;
1781 55422 : break;
1782 : }
1783 :
1784 : // OpSimpleNestedAlternativeBegin/Next/End
1785 : // OpNestedAlternativeBegin/Next/End
1786 : //
1787 : // Generate code for when we backtrack back out of an alternative into
1788 : // a Begin or Next node, or when the entry input count check fails. If
1789 : // there are more alternatives we need to jump to the next alternative,
1790 : // if not we backtrack back out of the current set of parentheses.
1791 : //
1792 : // In the case of non-simple nested assertions we need to also link the
1793 : // 'return address' appropriately to backtrack back out into the correct
1794 : // alternative.
1795 : case OpSimpleNestedAlternativeBegin:
1796 : case OpSimpleNestedAlternativeNext:
1797 : case OpNestedAlternativeBegin:
1798 : case OpNestedAlternativeNext: {
1799 47402 : YarrOp& nextOp = m_ops[op.m_nextOp];
1800 47402 : bool isBegin = op.m_previousOp == notFound;
1801 47402 : bool isLastAlternative = nextOp.m_nextOp == notFound;
1802 47402 : ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
1803 47402 : ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
1804 :
1805 : // Treat an input check failure the same as a failed match.
1806 47402 : m_backtrackingState.append(op.m_jumps);
1807 :
1808 : // Set the backtracks to jump to the appropriate place. We may need
1809 : // to link the backtracks in one of three different way depending on
1810 : // the type of alternative we are dealing with:
1811 : // - A single alternative, with no simplings.
1812 : // - The last alternative of a set of two or more.
1813 : // - An alternative other than the last of a set of two or more.
1814 : //
1815 : // In the case of a single alternative on its own, we don't need to
1816 : // jump anywhere - if the alternative fails to match we can just
1817 : // continue to backtrack out of the parentheses without jumping.
1818 : //
1819 : // In the case of the last alternative in a set of more than one, we
1820 : // need to jump to return back out to the beginning. We'll do so by
1821 : // adding a jump to the End node's m_jumps list, and linking this
1822 : // when we come to generate the Begin node. For alternatives other
1823 : // than the last, we need to jump to the next alternative.
1824 : //
1825 : // If the alternative had adjusted the input position we must link
1826 : // backtracking to here, correct, and then jump on. If not we can
1827 : // link the backtracks directly to their destination.
1828 47402 : if (op.m_checkAdjust) {
1829 : // Handle the cases where we need to link the backtracks here.
1830 10942 : m_backtrackingState.link(this);
1831 10942 : sub32(Imm32(op.m_checkAdjust), index);
1832 10942 : if (!isLastAlternative) {
1833 : // An alternative that is not the last should jump to its successor.
1834 1058 : jump(nextOp.m_reentry);
1835 9884 : } else if (!isBegin) {
1836 : // The last of more than one alternatives must jump back to the begnning.
1837 236 : nextOp.m_jumps.append(jump());
1838 : } else {
1839 : // A single alternative on its own can fall through.
1840 9648 : m_backtrackingState.fallthrough();
1841 : }
1842 : } else {
1843 : // Handle the cases where we can link the backtracks directly to their destinations.
1844 36460 : if (!isLastAlternative) {
1845 : // An alternative that is not the last should jump to its successor.
1846 2590 : m_backtrackingState.linkTo(nextOp.m_reentry, this);
1847 33870 : } else if (!isBegin) {
1848 : // The last of more than one alternatives must jump back to the begnning.
1849 2685 : m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
1850 : }
1851 : // In the case of a single alternative on its own do nothing - it can fall through.
1852 : }
1853 :
1854 : // At this point we've handled the backtracking back into this node.
1855 : // Now link any backtracks that need to jump to here.
1856 :
1857 : // For non-simple alternatives, link the alternative's 'return address'
1858 : // so that we backtrack back out into the previous alternative.
1859 47402 : if (op.m_op == OpNestedAlternativeNext)
1860 3648 : m_backtrackingState.append(op.m_returnAddress);
1861 :
1862 : // If there is more than one alternative, then the last alternative will
1863 : // have planted a jump to be linked to the end. This jump was added to the
1864 : // End node's m_jumps list. If we are back at the beginning, link it here.
1865 47402 : if (isBegin) {
1866 43754 : YarrOp* endOp = &m_ops[op.m_nextOp];
1867 91156 : while (endOp->m_nextOp != notFound) {
1868 3648 : ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1869 3648 : endOp = &m_ops[endOp->m_nextOp];
1870 : }
1871 43754 : ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1872 43754 : m_backtrackingState.append(endOp->m_jumps);
1873 : }
1874 :
1875 47402 : if (!isBegin) {
1876 3648 : YarrOp& lastOp = m_ops[op.m_previousOp];
1877 3648 : m_checked += lastOp.m_checkAdjust;
1878 : }
1879 47402 : m_checked -= op.m_checkAdjust;
1880 47402 : break;
1881 : }
1882 : case OpSimpleNestedAlternativeEnd:
1883 : case OpNestedAlternativeEnd: {
1884 43754 : PatternTerm* term = op.m_term;
1885 :
1886 : // If we backtrack into the end of a simple subpattern do nothing;
1887 : // just continue through into the last alternative. If we backtrack
1888 : // into the end of a non-simple set of alterntives we need to jump
1889 : // to the backtracking return address set up during generation.
1890 43754 : if (op.m_op == OpNestedAlternativeEnd) {
1891 2921 : m_backtrackingState.link(this);
1892 :
1893 : // Plant a jump to the return address.
1894 2921 : unsigned parenthesesFrameLocation = term->frameLocation;
1895 2921 : unsigned alternativeFrameLocation = parenthesesFrameLocation;
1896 2921 : if (term->quantityType != QuantifierFixedCount)
1897 2 : alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1898 2921 : loadFromFrameAndJump(alternativeFrameLocation);
1899 :
1900 : // Link the DataLabelPtr associated with the end of the last
1901 : // alternative to this point.
1902 2921 : m_backtrackingState.append(op.m_returnAddress);
1903 : }
1904 :
1905 43754 : YarrOp& lastOp = m_ops[op.m_previousOp];
1906 43754 : m_checked += lastOp.m_checkAdjust;
1907 43754 : break;
1908 : }
1909 :
1910 : // OpParenthesesSubpatternOnceBegin/End
1911 : //
1912 : // When we are backtracking back out of a capturing subpattern we need
1913 : // to clear the start index in the matches output array, to record that
1914 : // this subpattern has not been captured.
1915 : //
1916 : // When backtracking back out of a Greedy quantified subpattern we need
1917 : // to catch this, and try running the remainder of the alternative after
1918 : // the subpattern again, skipping the parentheses.
1919 : //
1920 : // Upon backtracking back into a quantified set of parentheses we need to
1921 : // check whether we were currently skipping the subpattern. If not, we
1922 : // can backtrack into them, if we were we need to either backtrack back
1923 : // out of the start of the parentheses, or jump back to the forwards
1924 : // matching start, depending of whether the match is Greedy or NonGreedy.
1925 : case OpParenthesesSubpatternOnceBegin: {
1926 41001 : PatternTerm* term = op.m_term;
1927 41001 : ASSERT(term->quantityCount == 1);
1928 :
1929 : // We only need to backtrack to thispoint if capturing or greedy.
1930 41001 : if (term->capture() || term->quantityType == QuantifierGreedy) {
1931 37740 : m_backtrackingState.link(this);
1932 :
1933 : // If capturing, clear the capture (we only need to reset start).
1934 37740 : if (term->capture())
1935 31038 : store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
1936 :
1937 : // If Greedy, jump to the end.
1938 37740 : if (term->quantityType == QuantifierGreedy) {
1939 : // Clear the flag in the stackframe indicating we ran through the subpattern.
1940 6924 : unsigned parenthesesFrameLocation = term->frameLocation;
1941 6924 : storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1942 : // Jump to after the parentheses, skipping the subpattern.
1943 6924 : jump(m_ops[op.m_nextOp].m_reentry);
1944 : // A backtrack from after the parentheses, when skipping the subpattern,
1945 : // will jump back to here.
1946 6924 : op.m_jumps.link(this);
1947 : }
1948 :
1949 37740 : m_backtrackingState.fallthrough();
1950 : }
1951 41001 : break;
1952 : }
1953 : case OpParenthesesSubpatternOnceEnd: {
1954 41001 : PatternTerm* term = op.m_term;
1955 :
1956 41001 : if (term->quantityType != QuantifierFixedCount) {
1957 6924 : m_backtrackingState.link(this);
1958 :
1959 : // Check whether we should backtrack back into the parentheses, or if we
1960 : // are currently in a state where we had skipped over the subpattern
1961 : // (in which case the flag value on the stack will be -1).
1962 6924 : unsigned parenthesesFrameLocation = term->frameLocation;
1963 6924 : Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
1964 :
1965 6924 : if (term->quantityType == QuantifierGreedy) {
1966 : // For Greedy parentheses, we skip after having already tried going
1967 : // through the subpattern, so if we get here we're done.
1968 6924 : YarrOp& beginOp = m_ops[op.m_previousOp];
1969 6924 : beginOp.m_jumps.append(hadSkipped);
1970 : } else {
1971 : // For NonGreedy parentheses, we try skipping the subpattern first,
1972 : // so if we get here we need to try running through the subpattern
1973 : // next. Jump back to the start of the parentheses in the forwards
1974 : // matching path.
1975 0 : ASSERT(term->quantityType == QuantifierNonGreedy);
1976 0 : YarrOp& beginOp = m_ops[op.m_previousOp];
1977 0 : hadSkipped.linkTo(beginOp.m_reentry, this);
1978 : }
1979 :
1980 6924 : m_backtrackingState.fallthrough();
1981 : }
1982 :
1983 41001 : m_backtrackingState.append(op.m_jumps);
1984 41001 : break;
1985 : }
1986 :
1987 : // OpParenthesesSubpatternTerminalBegin/End
1988 : //
1989 : // Terminal subpatterns will always match - there is nothing after them to
1990 : // force a backtrack, and they have a minimum count of 0, and as such will
1991 : // always produce an acceptable result.
1992 : case OpParenthesesSubpatternTerminalBegin: {
1993 : // We will backtrack to this point once the subpattern cannot match any
1994 : // more. Since no match is accepted as a successful match (we are Greedy
1995 : // quantified with a minimum of zero) jump back to the forwards matching
1996 : // path at the end.
1997 2717 : YarrOp& endOp = m_ops[op.m_nextOp];
1998 2717 : m_backtrackingState.linkTo(endOp.m_reentry, this);
1999 2717 : break;
2000 : }
2001 : case OpParenthesesSubpatternTerminalEnd:
2002 : // We should never be backtracking to here (hence the 'terminal' in the name).
2003 2717 : ASSERT(m_backtrackingState.isEmpty());
2004 2717 : m_backtrackingState.append(op.m_jumps);
2005 2717 : break;
2006 :
2007 : // OpParentheticalAssertionBegin/End
2008 : case OpParentheticalAssertionBegin: {
2009 36 : PatternTerm* term = op.m_term;
2010 36 : YarrOp& endOp = m_ops[op.m_nextOp];
2011 :
2012 : // We need to handle the backtracks upon backtracking back out
2013 : // of a parenthetical assertion if either we need to correct
2014 : // the input index, or the assertion was inverted.
2015 36 : if (op.m_checkAdjust || term->invert()) {
2016 18 : m_backtrackingState.link(this);
2017 :
2018 18 : if (op.m_checkAdjust)
2019 18 : add32(Imm32(op.m_checkAdjust), index);
2020 :
2021 : // In an inverted assertion failure to match the subpattern
2022 : // is treated as a successful match - jump to the end of the
2023 : // subpattern. We already have adjusted the input position
2024 : // back to that before the assertion, which is correct.
2025 18 : if (term->invert())
2026 9 : jump(endOp.m_reentry);
2027 :
2028 18 : m_backtrackingState.fallthrough();
2029 : }
2030 :
2031 : // The End node's jump list will contain any backtracks into
2032 : // the end of the assertion. Also, if inverted, we will have
2033 : // added the failure caused by a successful match to this.
2034 36 : m_backtrackingState.append(endOp.m_jumps);
2035 :
2036 36 : m_checked += op.m_checkAdjust;
2037 36 : break;
2038 : }
2039 : case OpParentheticalAssertionEnd: {
2040 : // FIXME: We should really be clearing any nested subpattern
2041 : // matches on bailing out from after the pattern. Firefox has
2042 : // this bug too (presumably because they use YARR!)
2043 :
2044 : // Never backtrack into an assertion; later failures bail to before the begin.
2045 36 : m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2046 :
2047 36 : YarrOp& lastOp = m_ops[op.m_previousOp];
2048 36 : m_checked -= lastOp.m_checkAdjust;
2049 36 : break;
2050 : }
2051 :
2052 : case OpMatchFailed:
2053 16614 : break;
2054 : }
2055 :
2056 : } while (opIndex);
2057 55269 : }
2058 :
2059 : // Compilation methods:
2060 : // ====================
2061 :
2062 : // opCompileParenthesesSubpattern
2063 : // Emits ops for a subpattern (set of parentheses). These consist
2064 : // of a set of alternatives wrapped in an outer set of nodes for
2065 : // the parentheses.
2066 : // Supported types of parentheses are 'Once' (quantityCount == 1)
2067 : // and 'Terminal' (non-capturing parentheses quantified as greedy
2068 : // and infinite).
2069 : // Alternatives will use the 'Simple' set of ops if either the
2070 : // subpattern is terminal (in which case we will never need to
2071 : // backtrack), or if the subpattern only contains one alternative.
2072 52531 : void opCompileParenthesesSubpattern(PatternTerm* term)
2073 : {
2074 : YarrOpCode parenthesesBeginOpCode;
2075 : YarrOpCode parenthesesEndOpCode;
2076 52531 : YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2077 52531 : YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2078 52531 : YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2079 :
2080 : // We can currently only compile quantity 1 subpatterns that are
2081 : // not copies. We generate a copy in the case of a range quantifier,
2082 : // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2083 : // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2084 : // comes where the subpattern is capturing, in which case we would
2085 : // need to restore the capture from the first subpattern upon a
2086 : // failure in the second.
2087 52531 : if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2088 : // Select the 'Once' nodes.
2089 46827 : parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2090 46827 : parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2091 :
2092 : // If there is more than one alternative we cannot use the 'simple' nodes.
2093 93654 : if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2094 5778 : alternativeBeginOpCode = OpNestedAlternativeBegin;
2095 5778 : alternativeNextOpCode = OpNestedAlternativeNext;
2096 5778 : alternativeEndOpCode = OpNestedAlternativeEnd;
2097 : }
2098 5704 : } else if (term->parentheses.isTerminal) {
2099 : // Terminal groups are optimized on the assumption that matching will never
2100 : // backtrack into the terminal group. But this is false if there is more
2101 : // than one alternative and one of the alternatives can match empty. In that
2102 : // case, the empty match is counted as a failure, so we would need to backtrack.
2103 : // The backtracking code doesn't handle this case correctly, so we fall back
2104 : // to the interpreter.
2105 2726 : Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2106 2726 : if (alternatives.size() != 1) {
2107 9 : for (unsigned i = 0; i < alternatives.size(); ++i) {
2108 9 : if (alternatives[i]->m_minimumSize == 0) {
2109 9 : m_shouldFallBack = true;
2110 9 : return;
2111 : }
2112 : }
2113 : }
2114 :
2115 : // Select the 'Terminal' nodes.
2116 2717 : parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2117 2717 : parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2118 : } else {
2119 : // This subpattern is not supported by the JIT.
2120 2978 : m_shouldFallBack = true;
2121 2978 : return;
2122 : }
2123 :
2124 49544 : size_t parenBegin = m_ops.size();
2125 49544 : m_ops.append(parenthesesBeginOpCode);
2126 :
2127 49544 : m_ops.append(alternativeBeginOpCode);
2128 49544 : m_ops.last().m_previousOp = notFound;
2129 49544 : m_ops.last().m_term = term;
2130 49544 : Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2131 105593 : for (unsigned i = 0; i < alternatives.size(); ++i) {
2132 56049 : size_t lastOpIndex = m_ops.size() - 1;
2133 :
2134 56049 : PatternAlternative* nestedAlternative = alternatives[i];
2135 56049 : opCompileAlternative(nestedAlternative);
2136 :
2137 56049 : size_t thisOpIndex = m_ops.size();
2138 56049 : m_ops.append(YarrOp(alternativeNextOpCode));
2139 :
2140 56049 : YarrOp& lastOp = m_ops[lastOpIndex];
2141 56049 : YarrOp& thisOp = m_ops[thisOpIndex];
2142 :
2143 56049 : lastOp.m_alternative = nestedAlternative;
2144 56049 : lastOp.m_nextOp = thisOpIndex;
2145 56049 : thisOp.m_previousOp = lastOpIndex;
2146 56049 : thisOp.m_term = term;
2147 : }
2148 49544 : YarrOp& lastOp = m_ops.last();
2149 49544 : ASSERT(lastOp.m_op == alternativeNextOpCode);
2150 49544 : lastOp.m_op = alternativeEndOpCode;
2151 49544 : lastOp.m_alternative = 0;
2152 49544 : lastOp.m_nextOp = notFound;
2153 :
2154 49544 : size_t parenEnd = m_ops.size();
2155 49544 : m_ops.append(parenthesesEndOpCode);
2156 :
2157 49544 : m_ops[parenBegin].m_term = term;
2158 49544 : m_ops[parenBegin].m_previousOp = notFound;
2159 49544 : m_ops[parenBegin].m_nextOp = parenEnd;
2160 49544 : m_ops[parenEnd].m_term = term;
2161 49544 : m_ops[parenEnd].m_previousOp = parenBegin;
2162 49544 : m_ops[parenEnd].m_nextOp = notFound;
2163 : }
2164 :
2165 : // opCompileParentheticalAssertion
2166 : // Emits ops for a parenthetical assertion. These consist of an
2167 : // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2168 : // the alternatives, with these wrapped by an outer pair of
2169 : // OpParentheticalAssertionBegin/End nodes.
2170 : // We can always use the OpSimpleNestedAlternative nodes in the
2171 : // case of parenthetical assertions since these only ever match
2172 : // once, and will never backtrack back into the assertion.
2173 45 : void opCompileParentheticalAssertion(PatternTerm* term)
2174 : {
2175 45 : size_t parenBegin = m_ops.size();
2176 45 : m_ops.append(OpParentheticalAssertionBegin);
2177 :
2178 45 : m_ops.append(OpSimpleNestedAlternativeBegin);
2179 45 : m_ops.last().m_previousOp = notFound;
2180 45 : m_ops.last().m_term = term;
2181 45 : Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2182 90 : for (unsigned i = 0; i < alternatives.size(); ++i) {
2183 45 : size_t lastOpIndex = m_ops.size() - 1;
2184 :
2185 45 : PatternAlternative* nestedAlternative = alternatives[i];
2186 45 : opCompileAlternative(nestedAlternative);
2187 :
2188 45 : size_t thisOpIndex = m_ops.size();
2189 45 : m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2190 :
2191 45 : YarrOp& lastOp = m_ops[lastOpIndex];
2192 45 : YarrOp& thisOp = m_ops[thisOpIndex];
2193 :
2194 45 : lastOp.m_alternative = nestedAlternative;
2195 45 : lastOp.m_nextOp = thisOpIndex;
2196 45 : thisOp.m_previousOp = lastOpIndex;
2197 45 : thisOp.m_term = term;
2198 : }
2199 45 : YarrOp& lastOp = m_ops.last();
2200 45 : ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2201 45 : lastOp.m_op = OpSimpleNestedAlternativeEnd;
2202 45 : lastOp.m_alternative = 0;
2203 45 : lastOp.m_nextOp = notFound;
2204 :
2205 45 : size_t parenEnd = m_ops.size();
2206 45 : m_ops.append(OpParentheticalAssertionEnd);
2207 :
2208 45 : m_ops[parenBegin].m_term = term;
2209 45 : m_ops[parenBegin].m_previousOp = notFound;
2210 45 : m_ops[parenBegin].m_nextOp = parenEnd;
2211 45 : m_ops[parenEnd].m_term = term;
2212 45 : m_ops[parenEnd].m_previousOp = parenBegin;
2213 45 : m_ops[parenEnd].m_nextOp = notFound;
2214 45 : }
2215 :
2216 : // opCompileAlternative
2217 : // Called to emit nodes for all terms in an alternative.
2218 116012 : void opCompileAlternative(PatternAlternative* alternative)
2219 : {
2220 116012 : optimizeAlternative(alternative);
2221 :
2222 513243 : for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2223 397231 : PatternTerm* term = &alternative->m_terms[i];
2224 :
2225 397231 : switch (term->type) {
2226 : case PatternTerm::TypeParenthesesSubpattern:
2227 52531 : opCompileParenthesesSubpattern(term);
2228 52531 : break;
2229 :
2230 : case PatternTerm::TypeParentheticalAssertion:
2231 45 : opCompileParentheticalAssertion(term);
2232 45 : break;
2233 :
2234 : default:
2235 344655 : m_ops.append(term);
2236 : }
2237 : }
2238 116012 : }
2239 :
2240 : // opCompileBody
2241 : // This method compiles the body disjunction of the regular expression.
2242 : // The body consists of two sets of alternatives - zero or more 'once
2243 : // through' (BOL anchored) alternatives, followed by zero or more
2244 : // repeated alternatives.
2245 : // For each of these two sets of alteratives, if not empty they will be
2246 : // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2247 : // 'begin' node referencing the first alternative, and 'next' nodes
2248 : // referencing any further alternatives. The begin/next/end nodes are
2249 : // linked together in a doubly linked list. In the case of repeating
2250 : // alternatives, the end node is also linked back to the beginning.
2251 : // If no repeating alternatives exist, then a OpMatchFailed node exists
2252 : // to return the failing result.
2253 58226 : void opCompileBody(PatternDisjunction* disjunction)
2254 : {
2255 58226 : Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives;
2256 58226 : size_t currentAlternativeIndex = 0;
2257 :
2258 : // Emit the 'once through' alternatives.
2259 58226 : if (alternatives.size() && alternatives[0]->onceThrough()) {
2260 19634 : m_ops.append(YarrOp(OpBodyAlternativeBegin));
2261 19634 : m_ops.last().m_previousOp = notFound;
2262 :
2263 19859 : do {
2264 19670 : size_t lastOpIndex = m_ops.size() - 1;
2265 19670 : PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2266 19670 : opCompileAlternative(alternative);
2267 :
2268 19670 : size_t thisOpIndex = m_ops.size();
2269 19670 : m_ops.append(YarrOp(OpBodyAlternativeNext));
2270 :
2271 19670 : YarrOp& lastOp = m_ops[lastOpIndex];
2272 19670 : YarrOp& thisOp = m_ops[thisOpIndex];
2273 :
2274 19670 : lastOp.m_alternative = alternative;
2275 19670 : lastOp.m_nextOp = thisOpIndex;
2276 19670 : thisOp.m_previousOp = lastOpIndex;
2277 :
2278 19670 : ++currentAlternativeIndex;
2279 19859 : } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2280 :
2281 19634 : YarrOp& lastOp = m_ops.last();
2282 :
2283 19634 : ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2284 19634 : lastOp.m_op = OpBodyAlternativeEnd;
2285 19634 : lastOp.m_alternative = 0;
2286 19634 : lastOp.m_nextOp = notFound;
2287 : }
2288 :
2289 58226 : if (currentAlternativeIndex == alternatives.size()) {
2290 19481 : m_ops.append(YarrOp(OpMatchFailed));
2291 19481 : return;
2292 : }
2293 :
2294 : // Emit the repeated alternatives.
2295 38745 : size_t repeatLoop = m_ops.size();
2296 38745 : m_ops.append(YarrOp(OpBodyAlternativeBegin));
2297 38745 : m_ops.last().m_previousOp = notFound;
2298 40248 : do {
2299 40248 : size_t lastOpIndex = m_ops.size() - 1;
2300 40248 : PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2301 40248 : ASSERT(!alternative->onceThrough());
2302 40248 : opCompileAlternative(alternative);
2303 :
2304 40248 : size_t thisOpIndex = m_ops.size();
2305 40248 : m_ops.append(YarrOp(OpBodyAlternativeNext));
2306 :
2307 40248 : YarrOp& lastOp = m_ops[lastOpIndex];
2308 40248 : YarrOp& thisOp = m_ops[thisOpIndex];
2309 :
2310 40248 : lastOp.m_alternative = alternative;
2311 40248 : lastOp.m_nextOp = thisOpIndex;
2312 40248 : thisOp.m_previousOp = lastOpIndex;
2313 :
2314 40248 : ++currentAlternativeIndex;
2315 40248 : } while (currentAlternativeIndex < alternatives.size());
2316 38745 : YarrOp& lastOp = m_ops.last();
2317 38745 : ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2318 38745 : lastOp.m_op = OpBodyAlternativeEnd;
2319 38745 : lastOp.m_alternative = 0;
2320 38745 : lastOp.m_nextOp = repeatLoop;
2321 : }
2322 :
2323 58226 : void generateEnter()
2324 : {
2325 : #if WTF_CPU_X86_64
2326 : push(X86Registers::ebp);
2327 : move(stackPointerRegister, X86Registers::ebp);
2328 : push(X86Registers::ebx);
2329 : #elif WTF_CPU_X86
2330 58226 : push(X86Registers::ebp);
2331 58226 : move(stackPointerRegister, X86Registers::ebp);
2332 : // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2333 58226 : push(X86Registers::ebx);
2334 58226 : push(X86Registers::edi);
2335 58226 : push(X86Registers::esi);
2336 : // load output into edi (2 = saved ebp + return address).
2337 : #if WTF_COMPILER_MSVC || WTF_COMPILER_SUNCC
2338 : loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2339 : loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2340 : loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2341 : loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2342 : #else
2343 58226 : loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2344 : #endif
2345 : #elif WTF_CPU_ARM
2346 : push(ARMRegisters::r4);
2347 : push(ARMRegisters::r5);
2348 : push(ARMRegisters::r6);
2349 : #if WTF_CPU_ARM_TRADITIONAL
2350 : push(ARMRegisters::r8); // scratch register
2351 : #endif
2352 : move(ARMRegisters::r3, output);
2353 : #elif WTF_CPU_SH4
2354 : push(SH4Registers::r11);
2355 : push(SH4Registers::r13);
2356 : #elif WTF_CPU_SPARC
2357 : save(Imm32(-m_pattern.m_body->m_callFrameSize * sizeof(void*)));
2358 : #elif WTF_CPU_MIPS
2359 : // Do nothing.
2360 : #endif
2361 58226 : }
2362 :
2363 112212 : void generateReturn()
2364 : {
2365 : #if WTF_CPU_X86_64
2366 : pop(X86Registers::ebx);
2367 : pop(X86Registers::ebp);
2368 : #elif WTF_CPU_X86
2369 112212 : pop(X86Registers::esi);
2370 112212 : pop(X86Registers::edi);
2371 112212 : pop(X86Registers::ebx);
2372 112212 : pop(X86Registers::ebp);
2373 : #elif WTF_CPU_ARM
2374 : #if WTF_CPU_ARM_TRADITIONAL
2375 : pop(ARMRegisters::r8); // scratch register
2376 : #endif
2377 : pop(ARMRegisters::r6);
2378 : pop(ARMRegisters::r5);
2379 : pop(ARMRegisters::r4);
2380 : #elif WTF_CPU_SH4
2381 : pop(SH4Registers::r13);
2382 : pop(SH4Registers::r11);
2383 : #elif WTF_CPU_SPARC
2384 : ret_and_restore();
2385 : return;
2386 : #elif WTF_CPU_MIPS
2387 : // Do nothing
2388 : #endif
2389 112212 : ret();
2390 112212 : }
2391 :
2392 : public:
2393 58226 : YarrGenerator(YarrPattern& pattern)
2394 : : m_pattern(pattern)
2395 : , m_shouldFallBack(false)
2396 58226 : , m_checked(0)
2397 : {
2398 58226 : }
2399 :
2400 58226 : void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
2401 : {
2402 58226 : generateEnter();
2403 :
2404 58226 : if (!m_pattern.m_body->m_hasFixedSize)
2405 43737 : store32(index, Address(output));
2406 :
2407 : #if !WTF_CPU_SPARC
2408 58226 : if (m_pattern.m_body->m_callFrameSize)
2409 27189 : subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
2410 : #endif
2411 :
2412 : // Compile the pattern to the internal 'YarrOp' representation.
2413 58226 : opCompileBody(m_pattern.m_body);
2414 :
2415 : // If we encountered anything we can't handle in the JIT code
2416 : // (e.g. backreferences) then return early.
2417 58226 : if (m_shouldFallBack) {
2418 2957 : jitObject.setFallBack(true);
2419 2957 : return;
2420 : }
2421 :
2422 55269 : generate();
2423 55269 : backtrack();
2424 :
2425 : // Link & finalize the code.
2426 : // XXX yarr-oom
2427 : ExecutablePool *pool;
2428 : bool ok;
2429 110538 : LinkBuffer linkBuffer(this, globalData->regexAllocator, &pool, &ok, REGEXP_CODE);
2430 55269 : m_backtrackingState.linkDataLabels(linkBuffer);
2431 55269 : jitObject.set(linkBuffer.finalizeCode());
2432 55269 : jitObject.setFallBack(m_shouldFallBack);
2433 : }
2434 :
2435 : private:
2436 : YarrPattern& m_pattern;
2437 :
2438 : // Used to detect regular expression constructs that are not currently
2439 : // supported in the JIT; fall back to the interpreter when this is detected.
2440 : bool m_shouldFallBack;
2441 :
2442 : // The regular expression expressed as a linear sequence of operations.
2443 : Vector<YarrOp, 128> m_ops;
2444 :
2445 : // This records the current input offset being applied due to the current
2446 : // set of alternatives we are nested within. E.g. when matching the
2447 : // character 'b' within the regular expression /abc/, we will know that
2448 : // the minimum size for the alternative is 3, checked upon entry to the
2449 : // alternative, and that 'b' is at offset 1 from the start, and as such
2450 : // when matching 'b' we need to apply an offset of -2 to the load.
2451 : //
2452 : // FIXME: This should go away. Rather than tracking this value throughout
2453 : // code generation, we should gather this information up front & store it
2454 : // on the YarrOp structure.
2455 : int m_checked;
2456 :
2457 : // This class records state whilst generating the backtracking path of code.
2458 : BacktrackingState m_backtrackingState;
2459 : };
2460 :
2461 58226 : void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject)
2462 : {
2463 58226 : YarrGenerator(pattern).compile(globalData, jitObject);
2464 58226 : }
2465 :
2466 3461258 : int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
2467 : {
2468 3461258 : return jitObject.execute(input, start, length, output);
2469 : }
2470 :
2471 : }}
2472 :
2473 : #endif
|