1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sw=4 et tw=99 ft=cpp:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
6 : *
7 : * Redistribution and use in source and binary forms, with or without
8 : * modification, are permitted provided that the following conditions
9 : * are met:
10 : * 1. Redistributions of source code must retain the above copyright
11 : * notice, this list of conditions and the following disclaimer.
12 : * 2. Redistributions in binary form must reproduce the above copyright
13 : * notice, this list of conditions and the following disclaimer in the
14 : * documentation and/or other materials provided with the distribution.
15 : *
16 : * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17 : * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 : * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
20 : * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 : * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 : * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 : * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 : * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 : *
28 : * ***** END LICENSE BLOCK ***** */
29 :
30 : #ifndef YarrInterpreter_h
31 : #define YarrInterpreter_h
32 :
33 : #include "YarrPattern.h"
34 :
35 : namespace WTF {
36 : class BumpPointerAllocator;
37 : }
38 : using WTF::BumpPointerAllocator;
39 :
40 : namespace JSC { namespace Yarr {
41 :
42 : class ByteDisjunction;
43 :
44 284020 : struct ByteTerm {
45 : enum Type {
46 : TypeBodyAlternativeBegin,
47 : TypeBodyAlternativeDisjunction,
48 : TypeBodyAlternativeEnd,
49 : TypeAlternativeBegin,
50 : TypeAlternativeDisjunction,
51 : TypeAlternativeEnd,
52 : TypeSubpatternBegin,
53 : TypeSubpatternEnd,
54 : TypeAssertionBOL,
55 : TypeAssertionEOL,
56 : TypeAssertionWordBoundary,
57 : TypePatternCharacterOnce,
58 : TypePatternCharacterFixed,
59 : TypePatternCharacterGreedy,
60 : TypePatternCharacterNonGreedy,
61 : TypePatternCasedCharacterOnce,
62 : TypePatternCasedCharacterFixed,
63 : TypePatternCasedCharacterGreedy,
64 : TypePatternCasedCharacterNonGreedy,
65 : TypeCharacterClass,
66 : TypeBackReference,
67 : TypeParenthesesSubpattern,
68 : TypeParenthesesSubpatternOnceBegin,
69 : TypeParenthesesSubpatternOnceEnd,
70 : TypeParenthesesSubpatternTerminalBegin,
71 : TypeParenthesesSubpatternTerminalEnd,
72 : TypeParentheticalAssertionBegin,
73 : TypeParentheticalAssertionEnd,
74 : TypeCheckInput,
75 : TypeUncheckInput
76 : } type;
77 : union {
78 : struct {
79 : union {
80 : UChar patternCharacter;
81 : struct {
82 : UChar lo;
83 : UChar hi;
84 : } casedCharacter;
85 : CharacterClass* characterClass;
86 : unsigned subpatternId;
87 : };
88 : union {
89 : ByteDisjunction* parenthesesDisjunction;
90 : unsigned parenthesesWidth;
91 : };
92 : QuantifierType quantityType;
93 : unsigned quantityCount;
94 : } atom;
95 : struct {
96 : int next;
97 : int end;
98 : bool onceThrough;
99 : } alternative;
100 : unsigned checkInputCount;
101 : };
102 : unsigned frameLocation;
103 : bool m_capture : 1;
104 : bool m_invert : 1;
105 : int inputPosition;
106 :
107 : // For js::Vector. Does not create a valid object.
108 0 : ByteTerm()
109 : {
110 0 : }
111 :
112 11692 : ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
113 : : frameLocation(frameLocation)
114 : , m_capture(false)
115 11692 : , m_invert(false)
116 : {
117 11692 : switch (quantityType) {
118 : case QuantifierFixedCount:
119 11611 : type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
120 11611 : break;
121 : case QuantifierGreedy:
122 72 : type = ByteTerm::TypePatternCharacterGreedy;
123 72 : break;
124 : case QuantifierNonGreedy:
125 9 : type = ByteTerm::TypePatternCharacterNonGreedy;
126 9 : break;
127 : }
128 :
129 11692 : atom.patternCharacter = ch;
130 11692 : atom.quantityType = quantityType;
131 11692 : atom.quantityCount = quantityCount;
132 11692 : inputPosition = inputPos;
133 11692 : }
134 :
135 297 : ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
136 : : frameLocation(frameLocation)
137 : , m_capture(false)
138 297 : , m_invert(false)
139 : {
140 297 : switch (quantityType) {
141 : case QuantifierFixedCount:
142 297 : type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
143 297 : break;
144 : case QuantifierGreedy:
145 0 : type = ByteTerm::TypePatternCasedCharacterGreedy;
146 0 : break;
147 : case QuantifierNonGreedy:
148 0 : type = ByteTerm::TypePatternCasedCharacterNonGreedy;
149 0 : break;
150 : }
151 :
152 297 : atom.casedCharacter.lo = lo;
153 297 : atom.casedCharacter.hi = hi;
154 297 : atom.quantityType = quantityType;
155 297 : atom.quantityCount = quantityCount;
156 297 : inputPosition = inputPos;
157 297 : }
158 :
159 39666 : ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
160 : : type(ByteTerm::TypeCharacterClass)
161 : , m_capture(false)
162 39666 : , m_invert(invert)
163 : {
164 39666 : atom.characterClass = characterClass;
165 39666 : atom.quantityType = QuantifierFixedCount;
166 39666 : atom.quantityCount = 1;
167 39666 : inputPosition = inputPos;
168 39666 : }
169 :
170 2997 : ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
171 : : type(type)
172 : , m_capture(capture)
173 2997 : , m_invert(false)
174 : {
175 2997 : atom.subpatternId = subpatternId;
176 2997 : atom.parenthesesDisjunction = parenthesesInfo;
177 2997 : atom.quantityType = QuantifierFixedCount;
178 2997 : atom.quantityCount = 1;
179 2997 : inputPosition = inputPos;
180 2997 : }
181 :
182 49868 : ByteTerm(Type type, bool invert = false)
183 : : type(type)
184 : , m_capture(false)
185 49868 : , m_invert(invert)
186 : {
187 49868 : atom.quantityType = QuantifierFixedCount;
188 49868 : atom.quantityCount = 1;
189 49868 : }
190 :
191 20653 : ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
192 : : type(type)
193 : , m_capture(capture)
194 20653 : , m_invert(invert)
195 : {
196 20653 : atom.subpatternId = subpatternId;
197 20653 : atom.quantityType = QuantifierFixedCount;
198 20653 : atom.quantityCount = 1;
199 20653 : inputPosition = inputPos;
200 20653 : }
201 :
202 2885 : static ByteTerm BOL(int inputPos)
203 : {
204 2885 : ByteTerm term(TypeAssertionBOL);
205 2885 : term.inputPosition = inputPos;
206 : return term;
207 : }
208 :
209 14426 : static ByteTerm CheckInput(unsigned count)
210 : {
211 14426 : ByteTerm term(TypeCheckInput);
212 14426 : term.checkInputCount = count;
213 : return term;
214 : }
215 :
216 0 : static ByteTerm UncheckInput(unsigned count)
217 : {
218 0 : ByteTerm term(TypeUncheckInput);
219 0 : term.checkInputCount = count;
220 : return term;
221 : }
222 :
223 2822 : static ByteTerm EOL(int inputPos)
224 : {
225 2822 : ByteTerm term(TypeAssertionEOL);
226 2822 : term.inputPosition = inputPos;
227 : return term;
228 : }
229 :
230 9 : static ByteTerm WordBoundary(bool invert, int inputPos)
231 : {
232 9 : ByteTerm term(TypeAssertionWordBoundary, invert);
233 9 : term.inputPosition = inputPos;
234 : return term;
235 : }
236 :
237 18 : static ByteTerm BackReference(unsigned subpatternId, int inputPos)
238 : {
239 18 : return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
240 : }
241 :
242 2975 : static ByteTerm BodyAlternativeBegin(bool onceThrough)
243 : {
244 2975 : ByteTerm term(TypeBodyAlternativeBegin);
245 2975 : term.alternative.next = 0;
246 2975 : term.alternative.end = 0;
247 2975 : term.alternative.onceThrough = onceThrough;
248 : return term;
249 : }
250 :
251 18 : static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
252 : {
253 18 : ByteTerm term(TypeBodyAlternativeDisjunction);
254 18 : term.alternative.next = 0;
255 18 : term.alternative.end = 0;
256 18 : term.alternative.onceThrough = onceThrough;
257 : return term;
258 : }
259 :
260 2975 : static ByteTerm BodyAlternativeEnd()
261 : {
262 2975 : ByteTerm term(TypeBodyAlternativeEnd);
263 2975 : term.alternative.next = 0;
264 2975 : term.alternative.end = 0;
265 2975 : term.alternative.onceThrough = false;
266 : return term;
267 : }
268 :
269 11816 : static ByteTerm AlternativeBegin()
270 : {
271 11816 : ByteTerm term(TypeAlternativeBegin);
272 11816 : term.alternative.next = 0;
273 11816 : term.alternative.end = 0;
274 11816 : term.alternative.onceThrough = false;
275 : return term;
276 : }
277 :
278 2974 : static ByteTerm AlternativeDisjunction()
279 : {
280 2974 : ByteTerm term(TypeAlternativeDisjunction);
281 2974 : term.alternative.next = 0;
282 2974 : term.alternative.end = 0;
283 2974 : term.alternative.onceThrough = false;
284 : return term;
285 : }
286 :
287 2974 : static ByteTerm AlternativeEnd()
288 : {
289 2974 : ByteTerm term(TypeAlternativeEnd);
290 2974 : term.alternative.next = 0;
291 2974 : term.alternative.end = 0;
292 2974 : term.alternative.onceThrough = false;
293 : return term;
294 : }
295 :
296 2997 : static ByteTerm SubpatternBegin()
297 : {
298 2997 : return ByteTerm(TypeSubpatternBegin);
299 : }
300 :
301 2997 : static ByteTerm SubpatternEnd()
302 : {
303 2997 : return ByteTerm(TypeSubpatternEnd);
304 : }
305 :
306 7635402 : bool invert()
307 : {
308 7635402 : return m_invert;
309 : }
310 :
311 4862853 : bool capture()
312 : {
313 4862853 : return m_capture;
314 : }
315 : };
316 :
317 5972 : class ByteDisjunction {
318 : WTF_MAKE_FAST_ALLOCATED
319 : public:
320 5972 : ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
321 : : m_numSubpatterns(numSubpatterns)
322 5972 : , m_frameSize(frameSize)
323 : {
324 5972 : }
325 :
326 : Vector<ByteTerm> terms;
327 : unsigned m_numSubpatterns;
328 : unsigned m_frameSize;
329 : };
330 :
331 : struct BytecodePattern {
332 : WTF_MAKE_FAST_ALLOCATED
333 : public:
334 2975 : BytecodePattern(PassOwnPtr<ByteDisjunction> body, const Vector<ByteDisjunction*> &allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
335 : : m_body(body)
336 : , m_ignoreCase(pattern.m_ignoreCase)
337 : , m_multiline(pattern.m_multiline)
338 2975 : , m_allocator(allocator)
339 : {
340 2975 : newlineCharacterClass = pattern.newlineCharacterClass();
341 2975 : wordcharCharacterClass = pattern.wordcharCharacterClass();
342 :
343 2975 : m_allParenthesesInfo.append(allParenthesesInfo);
344 2975 : m_userCharacterClasses.append(pattern.m_userCharacterClasses);
345 : // 'Steal' the YarrPattern's CharacterClasses! We clear its
346 : // array, so that it won't delete them on destruction. We'll
347 : // take responsibility for that.
348 2975 : pattern.m_userCharacterClasses.clear();
349 2975 : }
350 :
351 2975 : ~BytecodePattern()
352 2975 : {
353 2975 : deleteAllValues(m_allParenthesesInfo);
354 2975 : deleteAllValues(m_userCharacterClasses);
355 2975 : }
356 :
357 : OwnPtr<ByteDisjunction> m_body;
358 : bool m_ignoreCase;
359 : bool m_multiline;
360 : // Each BytecodePattern is associated with a RegExp, each RegExp is associated
361 : // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
362 : BumpPointerAllocator* m_allocator;
363 :
364 : CharacterClass* newlineCharacterClass;
365 : CharacterClass* wordcharCharacterClass;
366 :
367 : private:
368 : Vector<ByteDisjunction*> m_allParenthesesInfo;
369 : Vector<CharacterClass*> m_userCharacterClasses;
370 : };
371 :
372 : } } // namespace JSC::Yarr
373 :
374 : #endif // YarrInterpreter_h
|