1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sw=4 et tw=99 ft=cpp:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 : *
7 : * The contents of this file are subject to the Mozilla Public License Version
8 : * 1.1 (the "License"); you may not use this file except in compliance with
9 : * the License. You may obtain a copy of the License at
10 : * http://www.mozilla.org/MPL/
11 : *
12 : * Software distributed under the License is distributed on an "AS IS" basis,
13 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 : * for the specific language governing rights and limitations under the
15 : * License.
16 : *
17 : * The Original Code is Mozilla SpiderMonkey JavaScript code.
18 : *
19 : * The Initial Developer of the Original Code is
20 : * the Mozilla Foundation.
21 : * Portions created by the Initial Developer are Copyright (C) 2011
22 : * the Initial Developer. All Rights Reserved.
23 : *
24 : * Contributor(s):
25 : * Chris Leary <cdleary@mozilla.com>
26 : *
27 : * Alternatively, the contents of this file may be used under the terms of
28 : * either the GNU General Public License Version 2 or later (the "GPL"), or
29 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 : * in which case the provisions of the GPL or the LGPL are applicable instead
31 : * of those above. If you wish to allow use of your version of this file only
32 : * under the terms of either the GPL or the LGPL, and not to allow others to
33 : * use your version of this file under the terms of the MPL, indicate your
34 : * decision by deleting the provisions above and replace them with the notice
35 : * and other provisions required by the GPL or the LGPL. If you do not delete
36 : * the provisions above, a recipient may use your version of this file under
37 : * the terms of any one of the MPL, the GPL or the LGPL.
38 : *
39 : * ***** END LICENSE BLOCK ***** */
40 :
41 : #ifndef RegExpObject_h__
42 : #define RegExpObject_h__
43 :
44 : #include "mozilla/Attributes.h"
45 :
46 : #include <stddef.h>
47 : #include "jsobj.h"
48 :
49 : #include "js/TemplateLib.h"
50 :
51 : #include "yarr/Yarr.h"
52 : #if ENABLE_YARR_JIT
53 : #include "yarr/YarrJIT.h"
54 : #include "yarr/YarrSyntaxChecker.h"
55 : #else
56 : #include "yarr/pcre/pcre.h"
57 : #endif
58 :
59 : /*
60 : * JavaScript Regular Expressions
61 : *
62 : * There are several engine concepts associated with a single logical regexp:
63 : *
64 : * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp"
65 : *
66 : * RegExpShared - The compiled representation of the regexp.
67 : *
68 : * RegExpCode - The low-level implementation jit details.
69 : *
70 : * RegExpCompartment - Owns all RegExpShared instances in a compartment.
71 : *
72 : * To save memory, a RegExpShared is not created for a RegExpObject until it is
73 : * needed for execution. When a RegExpShared needs to be created, it is looked
74 : * up in a per-compartment table to allow reuse between objects. Lastly, on
75 : * GC, every RegExpShared (that is not active on the callstack) is discarded.
76 : * Because of the last point, any code using a RegExpShared (viz., by executing
77 : * a regexp) must indicate the RegExpShared is active via RegExpGuard.
78 : */
79 : namespace js {
80 :
81 : enum RegExpRunStatus
82 : {
83 : RegExpRunStatus_Error,
84 : RegExpRunStatus_Success,
85 : RegExpRunStatus_Success_NotFound
86 : };
87 :
88 : class RegExpObjectBuilder
89 : {
90 : JSContext *cx;
91 : RegExpObject *reobj_;
92 :
93 : bool getOrCreate();
94 : bool getOrCreateClone(RegExpObject *proto);
95 :
96 : public:
97 : RegExpObjectBuilder(JSContext *cx, RegExpObject *reobj = NULL);
98 :
99 : RegExpObject *reobj() { return reobj_; }
100 :
101 : RegExpObject *build(JSAtom *source, RegExpFlag flags);
102 : RegExpObject *build(JSAtom *source, RegExpShared &shared);
103 :
104 : /* Perform a VM-internal clone. */
105 : RegExpObject *clone(RegExpObject *other, RegExpObject *proto);
106 : };
107 :
108 : JSObject *
109 : CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *proto);
110 :
111 : namespace detail {
112 :
113 : class RegExpCode
114 : {
115 : #if ENABLE_YARR_JIT
116 : typedef JSC::Yarr::BytecodePattern BytecodePattern;
117 : typedef JSC::Yarr::ErrorCode ErrorCode;
118 : typedef JSC::Yarr::JSGlobalData JSGlobalData;
119 : typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock;
120 : typedef JSC::Yarr::YarrPattern YarrPattern;
121 :
122 : /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */
123 : YarrCodeBlock codeBlock;
124 : BytecodePattern *byteCode;
125 : #else
126 : JSRegExp *compiled;
127 : #endif
128 :
129 : public:
130 58244 : RegExpCode()
131 : :
132 : #if ENABLE_YARR_JIT
133 : codeBlock(),
134 58244 : byteCode(NULL)
135 : #else
136 : compiled(NULL)
137 : #endif
138 58244 : { }
139 :
140 116488 : ~RegExpCode() {
141 : #if ENABLE_YARR_JIT
142 58244 : codeBlock.release();
143 58244 : if (byteCode)
144 2975 : Foreground::delete_<BytecodePattern>(byteCode);
145 : #else
146 : if (compiled)
147 : jsRegExpFree(compiled);
148 : #endif
149 58244 : }
150 :
151 39808 : static bool checkSyntax(JSContext *cx, TokenStream *tokenStream, JSLinearString *source) {
152 : #if ENABLE_YARR_JIT
153 39808 : ErrorCode error = JSC::Yarr::checkSyntax(*source);
154 39808 : if (error == JSC::Yarr::NoError)
155 39716 : return true;
156 :
157 92 : reportYarrError(cx, tokenStream, error);
158 92 : return false;
159 : #else
160 : # error "Syntax checking not implemented for !ENABLE_YARR_JIT"
161 : #endif
162 : }
163 :
164 : #if ENABLE_YARR_JIT
165 : static inline bool isJITRuntimeEnabled(JSContext *cx);
166 : static void reportYarrError(JSContext *cx, TokenStream *ts, JSC::Yarr::ErrorCode error);
167 : #else
168 : static void reportPCREError(JSContext *cx, int error);
169 : #endif
170 :
171 3528974 : static size_t getOutputSize(size_t pairCount) {
172 : #if ENABLE_YARR_JIT
173 3528974 : return pairCount * 2;
174 : #else
175 : return pairCount * 3; /* Should be x2, but PCRE has... needs. */
176 : #endif
177 : }
178 :
179 : bool compile(JSContext *cx, JSLinearString &pattern, unsigned *parenCount, RegExpFlag flags);
180 :
181 :
182 : RegExpRunStatus
183 : execute(JSContext *cx, const jschar *chars, size_t length, size_t start,
184 : int *output, size_t outputCount);
185 : };
186 :
187 : } /* namespace detail */
188 :
189 : /*
190 : * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
191 : * pointed to by potentially multiple RegExpObjects. Additionally, C++ code may
192 : * have pointers to RegExpShareds on the stack. The RegExpShareds are tracked in
193 : * a RegExpCompartment hashtable, and most are destroyed on every GC.
194 : *
195 : * During a GC, the trace hook for RegExpObject clears any pointers to
196 : * RegExpShareds so that there will be no dangling pointers when they are
197 : * deleted. However, some RegExpShareds are not deleted:
198 : *
199 : * 1. Any RegExpShared with pointers from the C++ stack is not deleted.
200 : * 2. Any RegExpShared that was installed in a RegExpObject during an
201 : * incremental GC is not deleted. This is because the RegExpObject may have
202 : * been traced through before the new RegExpShared was installed, in which
203 : * case deleting the RegExpShared would turn the RegExpObject's reference
204 : * into a dangling pointer
205 : *
206 : * The activeUseCount and gcNumberWhenUsed fields are used to track these two
207 : * conditions.
208 : */
209 : class RegExpShared
210 58244 : {
211 : friend class RegExpCompartment;
212 : friend class RegExpGuard;
213 :
214 : detail::RegExpCode code;
215 : unsigned parenCount;
216 : RegExpFlag flags;
217 : size_t activeUseCount; /* See comment above. */
218 : uint64_t gcNumberWhenUsed; /* See comment above. */
219 :
220 : bool compile(JSContext *cx, JSAtom *source);
221 :
222 : RegExpShared(JSRuntime *rt, RegExpFlag flags);
223 : JS_DECLARE_ALLOCATION_FRIENDS_FOR_PRIVATE_CONSTRUCTOR;
224 :
225 : public:
226 :
227 : /* Called when a RegExpShared is installed into a RegExpObject. */
228 : inline void prepareForUse(JSContext *cx);
229 :
230 : /* Primary interface: run this regular expression on the given string. */
231 :
232 : RegExpRunStatus
233 : execute(JSContext *cx, const jschar *chars, size_t length, size_t *lastIndex,
234 : MatchPairs **output);
235 :
236 : /* Accessors */
237 :
238 : size_t getParenCount() const { return parenCount; }
239 :
240 : /* Accounts for the "0" (whole match) pair. */
241 7057948 : size_t pairCount() const { return parenCount + 1; }
242 :
243 609471 : RegExpFlag getFlags() const { return flags; }
244 : bool ignoreCase() const { return flags & IgnoreCaseFlag; }
245 2874656 : bool global() const { return flags & GlobalFlag; }
246 : bool multiline() const { return flags & MultilineFlag; }
247 5291357 : bool sticky() const { return flags & StickyFlag; }
248 : };
249 :
250 : /*
251 : * Extend the lifetime of a given RegExpShared to at least the lifetime of
252 : * the guard object. See Regular Expression comment at the top.
253 : */
254 : class RegExpGuard
255 : {
256 : RegExpShared *re_;
257 : RegExpGuard(const RegExpGuard &) MOZ_DELETE;
258 : void operator=(const RegExpGuard &) MOZ_DELETE;
259 : public:
260 2837615 : RegExpGuard() : re_(NULL) {}
261 : RegExpGuard(RegExpShared &re) : re_(&re) {
262 : re_->activeUseCount++;
263 : }
264 2288758 : void init(RegExpShared &re) {
265 2288758 : JS_ASSERT(!re_);
266 2288758 : re_ = &re;
267 2288758 : re_->activeUseCount++;
268 2288758 : }
269 2837615 : ~RegExpGuard() {
270 2837615 : if (re_) {
271 2288758 : JS_ASSERT(re_->activeUseCount > 0);
272 2288758 : re_->activeUseCount--;
273 : }
274 2837615 : }
275 8061457 : bool initialized() const { return !!re_; }
276 3988060 : RegExpShared *operator->() { JS_ASSERT(initialized()); return re_; }
277 2363613 : RegExpShared &operator*() { JS_ASSERT(initialized()); return *re_; }
278 : };
279 :
280 : class RegExpCompartment
281 : {
282 : enum Type { Normal = 0x0, Hack = 0x1 };
283 :
284 : struct Key {
285 : JSAtom *atom;
286 : uint16_t flag;
287 : uint16_t type;
288 1739468 : Key() {}
289 86595 : Key(JSAtom *atom, RegExpFlag flag, Type type)
290 86595 : : atom(atom), flag(flag), type(type) {}
291 : typedef Key Lookup;
292 86595 : static HashNumber hash(const Lookup &l) {
293 86595 : return DefaultHasher<JSAtom *>::hash(l.atom) ^ (l.flag << 1) ^ l.type;
294 : }
295 29061 : static bool match(Key l, Key r) {
296 29061 : return l.atom == r.atom && l.flag == r.flag && l.type == r.type;
297 : }
298 : };
299 :
300 : typedef HashMap<Key, RegExpShared *, Key, RuntimeAllocPolicy> Map;
301 : Map map_;
302 :
303 : bool get(JSContext *cx, JSAtom *key, JSAtom *source, RegExpFlag flags, Type type,
304 : RegExpGuard *g);
305 :
306 : public:
307 : RegExpCompartment(JSRuntime *rt);
308 : ~RegExpCompartment();
309 :
310 : bool init(JSContext *cx);
311 : void sweep(JSRuntime *rt);
312 :
313 : /* Return a regexp corresponding to the given (source, flags) pair. */
314 : bool get(JSContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g);
315 :
316 : /* Like 'get', but compile 'maybeOpt' (if non-null). */
317 : bool get(JSContext *cx, JSAtom *source, JSString *maybeOpt, RegExpGuard *g);
318 :
319 : /*
320 : * A 'hacked' RegExpShared is one where the input 'source' doesn't match
321 : * what is actually compiled in the regexp. To compile a hacked regexp,
322 : * getHack may be called providing both the original 'source' and the
323 : * 'hackedSource' which should actually be compiled. For a given 'source'
324 : * there may only ever be one corresponding 'hackedSource'. Thus, we assume
325 : * there is some single pure function mapping 'source' to 'hackedSource'
326 : * that is always respected in calls to getHack. Note that this restriction
327 : * only applies to 'getHack': a single 'source' value may be passed to both
328 : * 'get' and 'getHack'.
329 : */
330 : bool getHack(JSContext *cx, JSAtom *source, JSAtom *hackedSource, RegExpFlag flags,
331 : RegExpGuard *g);
332 :
333 : /*
334 : * To avoid atomizing 'hackedSource', callers may call 'lookupHack',
335 : * passing only the original 'source'. Due to the abovementioned unique
336 : * mapping property, 'hackedSource' is unambiguous.
337 : */
338 : bool lookupHack(JSAtom *source, RegExpFlag flags, JSContext *cx, RegExpGuard *g);
339 : };
340 :
341 : class RegExpObject : public JSObject
342 : {
343 : typedef detail::RegExpCode RegExpCode;
344 :
345 : static const unsigned LAST_INDEX_SLOT = 0;
346 : static const unsigned SOURCE_SLOT = 1;
347 : static const unsigned GLOBAL_FLAG_SLOT = 2;
348 : static const unsigned IGNORE_CASE_FLAG_SLOT = 3;
349 : static const unsigned MULTILINE_FLAG_SLOT = 4;
350 : static const unsigned STICKY_FLAG_SLOT = 5;
351 :
352 : public:
353 : static const unsigned RESERVED_SLOTS = 6;
354 :
355 : /*
356 : * Note: The regexp statics flags are OR'd into the provided flags,
357 : * so this function is really meant for object creation during code
358 : * execution, as opposed to during something like XDR.
359 : */
360 : static RegExpObject *
361 : create(JSContext *cx, RegExpStatics *res, const jschar *chars, size_t length,
362 : RegExpFlag flags, TokenStream *ts);
363 :
364 : static RegExpObject *
365 : createNoStatics(JSContext *cx, const jschar *chars, size_t length, RegExpFlag flags,
366 : TokenStream *ts);
367 :
368 : static RegExpObject *
369 : createNoStatics(JSContext *cx, JSAtom *atom, RegExpFlag flags, TokenStream *ts);
370 :
371 : /*
372 : * Run the regular expression over the input text.
373 : *
374 : * Results are placed in |output| as integer pairs. For eaxmple,
375 : * |output[0]| and |output[1]| represent the text indices that make
376 : * up the "0" (whole match) pair. Capturing parens will result in
377 : * more output.
378 : *
379 : * N.B. it's the responsibility of the caller to hook the |output|
380 : * into the |RegExpStatics| appropriately, if necessary.
381 : */
382 : RegExpRunStatus
383 : execute(JSContext *cx, const jschar *chars, size_t length, size_t *lastIndex,
384 : MatchPairs **output);
385 :
386 : /* Accessors. */
387 :
388 1141929 : const Value &getLastIndex() const {
389 1141929 : return getSlot(LAST_INDEX_SLOT);
390 : }
391 : inline void setLastIndex(const Value &v);
392 : inline void setLastIndex(double d);
393 : inline void zeroLastIndex();
394 :
395 : JSFlatString *toString(JSContext *cx) const;
396 :
397 1748577 : JSAtom *getSource() const {
398 1748577 : return &getSlot(SOURCE_SLOT).toString()->asAtom();
399 : }
400 : inline void setSource(JSAtom *source);
401 :
402 610955 : RegExpFlag getFlags() const {
403 610955 : unsigned flags = 0;
404 610955 : flags |= global() ? GlobalFlag : 0;
405 610955 : flags |= ignoreCase() ? IgnoreCaseFlag : 0;
406 610955 : flags |= multiline() ? MultilineFlag : 0;
407 610955 : flags |= sticky() ? StickyFlag : 0;
408 610955 : return RegExpFlag(flags);
409 : }
410 :
411 : /* Flags. */
412 :
413 : inline void setIgnoreCase(bool enabled);
414 : inline void setGlobal(bool enabled);
415 : inline void setMultiline(bool enabled);
416 : inline void setSticky(bool enabled);
417 611604 : bool ignoreCase() const { return getSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); }
418 614344 : bool global() const { return getSlot(GLOBAL_FLAG_SLOT).toBoolean(); }
419 611604 : bool multiline() const { return getSlot(MULTILINE_FLAG_SLOT).toBoolean(); }
420 612541 : bool sticky() const { return getSlot(STICKY_FLAG_SLOT).toBoolean(); }
421 :
422 : inline void shared(RegExpGuard *g) const;
423 : inline bool getShared(JSContext *cx, RegExpGuard *g);
424 : inline void setShared(JSContext *cx, RegExpShared &shared);
425 :
426 : private:
427 : friend class RegExpObjectBuilder;
428 :
429 : /*
430 : * Compute the initial shape to associate with fresh RegExp objects,
431 : * encoding their initial properties. Return the shape after
432 : * changing this regular expression object's last property to it.
433 : */
434 : Shape *assignInitialShape(JSContext *cx);
435 :
436 : inline bool init(JSContext *cx, JSAtom *source, RegExpFlag flags);
437 :
438 : /*
439 : * Precondition: the syntax for |source| has already been validated.
440 : * Side effect: sets the private field.
441 : */
442 : bool createShared(JSContext *cx, RegExpGuard *g);
443 : RegExpShared *maybeShared() const;
444 :
445 : RegExpObject() MOZ_DELETE;
446 : RegExpObject &operator=(const RegExpObject &reo) MOZ_DELETE;
447 :
448 : /* Call setShared in preference to setPrivate. */
449 : void setPrivate(void *priv) MOZ_DELETE;
450 : };
451 :
452 : /*
453 : * Parse regexp flags. Report an error and return false if an invalid
454 : * sequence of flags is encountered (repeat/invalid flag).
455 : *
456 : * N.B. flagStr must be rooted.
457 : */
458 : bool
459 : ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut);
460 :
461 : /*
462 : * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared.
463 : *
464 : * Beware: this RegExpShared can be owned by a compartment other than
465 : * cx->compartment. Normal RegExpGuard (which is necessary anyways)
466 : * will protect the object but it is important not to assign the return value
467 : * to be the private of any RegExpObject.
468 : */
469 : inline bool
470 : RegExpToShared(JSContext *cx, JSObject &obj, RegExpGuard *g);
471 :
472 : bool
473 : XDRScriptRegExpObject(JSXDRState *xdr, HeapPtrObject *objp);
474 :
475 : } /* namespace js */
476 :
477 : #endif
|