LCOV - code coverage report
Current view: directory - js/src/vm - RegExpObject.h (source / functions) Found Hit Coverage
Test: app.info Lines: 57 57 100.0 %
Date: 2012-06-02 Functions: 26 26 100.0 %

       1                 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2                 :  * vim: set ts=8 sw=4 et tw=99 ft=cpp:
       3                 :  *
       4                 :  * ***** BEGIN LICENSE BLOCK *****
       5                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       6                 :  *
       7                 :  * The contents of this file are subject to the Mozilla Public License Version
       8                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       9                 :  * the License. You may obtain a copy of the License at
      10                 :  * http://www.mozilla.org/MPL/
      11                 :  *
      12                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      13                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      14                 :  * for the specific language governing rights and limitations under the
      15                 :  * License.
      16                 :  *
      17                 :  * The Original Code is Mozilla SpiderMonkey JavaScript code.
      18                 :  *
      19                 :  * The Initial Developer of the Original Code is
      20                 :  * the Mozilla Foundation.
      21                 :  * Portions created by the Initial Developer are Copyright (C) 2011
      22                 :  * the Initial Developer. All Rights Reserved.
      23                 :  *
      24                 :  * Contributor(s):
      25                 :  *  Chris Leary <cdleary@mozilla.com>
      26                 :  *
      27                 :  * Alternatively, the contents of this file may be used under the terms of
      28                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      29                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      30                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      31                 :  * of those above. If you wish to allow use of your version of this file only
      32                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      33                 :  * use your version of this file under the terms of the MPL, indicate your
      34                 :  * decision by deleting the provisions above and replace them with the notice
      35                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      36                 :  * the provisions above, a recipient may use your version of this file under
      37                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      38                 :  *
      39                 :  * ***** END LICENSE BLOCK ***** */
      40                 : 
      41                 : #ifndef RegExpObject_h__
      42                 : #define RegExpObject_h__
      43                 : 
      44                 : #include "mozilla/Attributes.h"
      45                 : 
      46                 : #include <stddef.h>
      47                 : #include "jsobj.h"
      48                 : 
      49                 : #include "js/TemplateLib.h"
      50                 : 
      51                 : #include "yarr/Yarr.h"
      52                 : #if ENABLE_YARR_JIT
      53                 : #include "yarr/YarrJIT.h"
      54                 : #include "yarr/YarrSyntaxChecker.h"
      55                 : #else
      56                 : #include "yarr/pcre/pcre.h"
      57                 : #endif
      58                 : 
      59                 : /*
      60                 :  * JavaScript Regular Expressions
      61                 :  *
      62                 :  * There are several engine concepts associated with a single logical regexp:
      63                 :  *
      64                 :  *   RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp"
      65                 :  *
      66                 :  *   RegExpShared - The compiled representation of the regexp.
      67                 :  *
      68                 :  *   RegExpCode - The low-level implementation jit details.
      69                 :  *
      70                 :  *   RegExpCompartment - Owns all RegExpShared instances in a compartment.
      71                 :  *
      72                 :  * To save memory, a RegExpShared is not created for a RegExpObject until it is
      73                 :  * needed for execution. When a RegExpShared needs to be created, it is looked
      74                 :  * up in a per-compartment table to allow reuse between objects. Lastly, on
      75                 :  * GC, every RegExpShared (that is not active on the callstack) is discarded.
      76                 :  * Because of the last point, any code using a RegExpShared (viz., by executing
      77                 :  * a regexp) must indicate the RegExpShared is active via RegExpGuard.
      78                 :  */
      79                 : namespace js {
      80                 : 
      81                 : enum RegExpRunStatus
      82                 : {
      83                 :     RegExpRunStatus_Error,
      84                 :     RegExpRunStatus_Success,
      85                 :     RegExpRunStatus_Success_NotFound
      86                 : };
      87                 : 
      88                 : class RegExpObjectBuilder
      89                 : {
      90                 :     JSContext       *cx;
      91                 :     RegExpObject    *reobj_;
      92                 : 
      93                 :     bool getOrCreate();
      94                 :     bool getOrCreateClone(RegExpObject *proto);
      95                 : 
      96                 :   public:
      97                 :     RegExpObjectBuilder(JSContext *cx, RegExpObject *reobj = NULL);
      98                 : 
      99                 :     RegExpObject *reobj() { return reobj_; }
     100                 : 
     101                 :     RegExpObject *build(JSAtom *source, RegExpFlag flags);
     102                 :     RegExpObject *build(JSAtom *source, RegExpShared &shared);
     103                 : 
     104                 :     /* Perform a VM-internal clone. */
     105                 :     RegExpObject *clone(RegExpObject *other, RegExpObject *proto);
     106                 : };
     107                 : 
     108                 : JSObject *
     109                 : CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *proto);
     110                 : 
     111                 : namespace detail {
     112                 : 
     113                 : class RegExpCode
     114                 : {
     115                 : #if ENABLE_YARR_JIT
     116                 :     typedef JSC::Yarr::BytecodePattern BytecodePattern;
     117                 :     typedef JSC::Yarr::ErrorCode ErrorCode;
     118                 :     typedef JSC::Yarr::JSGlobalData JSGlobalData;
     119                 :     typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock;
     120                 :     typedef JSC::Yarr::YarrPattern YarrPattern;
     121                 : 
     122                 :     /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */
     123                 :     YarrCodeBlock   codeBlock;
     124                 :     BytecodePattern *byteCode;
     125                 : #else
     126                 :     JSRegExp        *compiled;
     127                 : #endif
     128                 : 
     129                 :   public:
     130           58244 :     RegExpCode()
     131                 :       :
     132                 : #if ENABLE_YARR_JIT
     133                 :         codeBlock(),
     134           58244 :         byteCode(NULL)
     135                 : #else
     136                 :         compiled(NULL)
     137                 : #endif
     138           58244 :     { }
     139                 : 
     140          116488 :     ~RegExpCode() {
     141                 : #if ENABLE_YARR_JIT
     142           58244 :         codeBlock.release();
     143           58244 :         if (byteCode)
     144            2975 :             Foreground::delete_<BytecodePattern>(byteCode);
     145                 : #else
     146                 :         if (compiled)
     147                 :             jsRegExpFree(compiled);
     148                 : #endif
     149           58244 :     }
     150                 : 
     151           39808 :     static bool checkSyntax(JSContext *cx, TokenStream *tokenStream, JSLinearString *source) {
     152                 : #if ENABLE_YARR_JIT
     153           39808 :         ErrorCode error = JSC::Yarr::checkSyntax(*source);
     154           39808 :         if (error == JSC::Yarr::NoError)
     155           39716 :             return true;
     156                 : 
     157              92 :         reportYarrError(cx, tokenStream, error);
     158              92 :         return false;
     159                 : #else
     160                 : # error "Syntax checking not implemented for !ENABLE_YARR_JIT"
     161                 : #endif
     162                 :     }
     163                 : 
     164                 : #if ENABLE_YARR_JIT
     165                 :     static inline bool isJITRuntimeEnabled(JSContext *cx);
     166                 :     static void reportYarrError(JSContext *cx, TokenStream *ts, JSC::Yarr::ErrorCode error);
     167                 : #else
     168                 :     static void reportPCREError(JSContext *cx, int error);
     169                 : #endif
     170                 : 
     171         3528974 :     static size_t getOutputSize(size_t pairCount) {
     172                 : #if ENABLE_YARR_JIT
     173         3528974 :         return pairCount * 2;
     174                 : #else
     175                 :         return pairCount * 3; /* Should be x2, but PCRE has... needs. */
     176                 : #endif
     177                 :     }
     178                 : 
     179                 :     bool compile(JSContext *cx, JSLinearString &pattern, unsigned *parenCount, RegExpFlag flags);
     180                 : 
     181                 : 
     182                 :     RegExpRunStatus
     183                 :     execute(JSContext *cx, const jschar *chars, size_t length, size_t start,
     184                 :             int *output, size_t outputCount);
     185                 : };
     186                 : 
     187                 : }  /* namespace detail */
     188                 : 
     189                 : /*
     190                 :  * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
     191                 :  * pointed to by potentially multiple RegExpObjects. Additionally, C++ code may
     192                 :  * have pointers to RegExpShareds on the stack. The RegExpShareds are tracked in
     193                 :  * a RegExpCompartment hashtable, and most are destroyed on every GC.
     194                 :  *
     195                 :  * During a GC, the trace hook for RegExpObject clears any pointers to
     196                 :  * RegExpShareds so that there will be no dangling pointers when they are
     197                 :  * deleted. However, some RegExpShareds are not deleted:
     198                 :  *
     199                 :  *   1. Any RegExpShared with pointers from the C++ stack is not deleted.
     200                 :  *   2. Any RegExpShared that was installed in a RegExpObject during an
     201                 :  *      incremental GC is not deleted. This is because the RegExpObject may have
     202                 :  *      been traced through before the new RegExpShared was installed, in which
     203                 :  *      case deleting the RegExpShared would turn the RegExpObject's reference
     204                 :  *      into a dangling pointer
     205                 :  *
     206                 :  * The activeUseCount and gcNumberWhenUsed fields are used to track these two
     207                 :  * conditions.
     208                 :  */
     209                 : class RegExpShared
     210           58244 : {
     211                 :     friend class RegExpCompartment;
     212                 :     friend class RegExpGuard;
     213                 : 
     214                 :     detail::RegExpCode code;
     215                 :     unsigned              parenCount;
     216                 :     RegExpFlag         flags;
     217                 :     size_t             activeUseCount;   /* See comment above. */
     218                 :     uint64_t           gcNumberWhenUsed; /* See comment above. */
     219                 : 
     220                 :     bool compile(JSContext *cx, JSAtom *source);
     221                 : 
     222                 :     RegExpShared(JSRuntime *rt, RegExpFlag flags);
     223                 :     JS_DECLARE_ALLOCATION_FRIENDS_FOR_PRIVATE_CONSTRUCTOR;
     224                 : 
     225                 :   public:
     226                 : 
     227                 :     /* Called when a RegExpShared is installed into a RegExpObject. */
     228                 :     inline void prepareForUse(JSContext *cx);
     229                 : 
     230                 :     /* Primary interface: run this regular expression on the given string. */
     231                 : 
     232                 :     RegExpRunStatus
     233                 :     execute(JSContext *cx, const jschar *chars, size_t length, size_t *lastIndex,
     234                 :             MatchPairs **output);
     235                 : 
     236                 :     /* Accessors */
     237                 : 
     238                 :     size_t getParenCount() const        { return parenCount; }
     239                 : 
     240                 :     /* Accounts for the "0" (whole match) pair. */
     241         7057948 :     size_t pairCount() const            { return parenCount + 1; }
     242                 : 
     243          609471 :     RegExpFlag getFlags() const         { return flags; }
     244                 :     bool ignoreCase() const             { return flags & IgnoreCaseFlag; }
     245         2874656 :     bool global() const                 { return flags & GlobalFlag; }
     246                 :     bool multiline() const              { return flags & MultilineFlag; }
     247         5291357 :     bool sticky() const                 { return flags & StickyFlag; }
     248                 : };
     249                 : 
     250                 : /*
     251                 :  * Extend the lifetime of a given RegExpShared to at least the lifetime of
     252                 :  * the guard object. See Regular Expression comment at the top.
     253                 :  */
     254                 : class RegExpGuard
     255                 : {
     256                 :     RegExpShared *re_;
     257                 :     RegExpGuard(const RegExpGuard &) MOZ_DELETE;
     258                 :     void operator=(const RegExpGuard &) MOZ_DELETE;
     259                 :   public:
     260         2837615 :     RegExpGuard() : re_(NULL) {}
     261                 :     RegExpGuard(RegExpShared &re) : re_(&re) {
     262                 :         re_->activeUseCount++;
     263                 :     }
     264         2288758 :     void init(RegExpShared &re) {
     265         2288758 :         JS_ASSERT(!re_);
     266         2288758 :         re_ = &re;
     267         2288758 :         re_->activeUseCount++;
     268         2288758 :     }
     269         2837615 :     ~RegExpGuard() {
     270         2837615 :         if (re_) {
     271         2288758 :             JS_ASSERT(re_->activeUseCount > 0);
     272         2288758 :             re_->activeUseCount--;
     273                 :         }
     274         2837615 :     }
     275         8061457 :     bool initialized() const { return !!re_; }
     276         3988060 :     RegExpShared *operator->() { JS_ASSERT(initialized()); return re_; }
     277         2363613 :     RegExpShared &operator*() { JS_ASSERT(initialized()); return *re_; }
     278                 : };
     279                 : 
     280                 : class RegExpCompartment
     281                 : {
     282                 :     enum Type { Normal = 0x0, Hack = 0x1 };
     283                 : 
     284                 :     struct Key {
     285                 :         JSAtom *atom;
     286                 :         uint16_t flag;
     287                 :         uint16_t type;
     288         1739468 :         Key() {}
     289           86595 :         Key(JSAtom *atom, RegExpFlag flag, Type type)
     290           86595 :           : atom(atom), flag(flag), type(type) {}
     291                 :         typedef Key Lookup;
     292           86595 :         static HashNumber hash(const Lookup &l) {
     293           86595 :             return DefaultHasher<JSAtom *>::hash(l.atom) ^ (l.flag << 1) ^ l.type;
     294                 :         }
     295           29061 :         static bool match(Key l, Key r) {
     296           29061 :             return l.atom == r.atom && l.flag == r.flag && l.type == r.type;
     297                 :         }
     298                 :     };
     299                 : 
     300                 :     typedef HashMap<Key, RegExpShared *, Key, RuntimeAllocPolicy> Map;
     301                 :     Map map_;
     302                 : 
     303                 :     bool get(JSContext *cx, JSAtom *key, JSAtom *source, RegExpFlag flags, Type type,
     304                 :              RegExpGuard *g);
     305                 : 
     306                 :   public:
     307                 :     RegExpCompartment(JSRuntime *rt);
     308                 :     ~RegExpCompartment();
     309                 : 
     310                 :     bool init(JSContext *cx);
     311                 :     void sweep(JSRuntime *rt);
     312                 : 
     313                 :     /* Return a regexp corresponding to the given (source, flags) pair. */
     314                 :     bool get(JSContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g);
     315                 : 
     316                 :     /* Like 'get', but compile 'maybeOpt' (if non-null). */
     317                 :     bool get(JSContext *cx, JSAtom *source, JSString *maybeOpt, RegExpGuard *g);
     318                 : 
     319                 :     /*
     320                 :      * A 'hacked' RegExpShared is one where the input 'source' doesn't match
     321                 :      * what is actually compiled in the regexp. To compile a hacked regexp,
     322                 :      * getHack may be called providing both the original 'source' and the
     323                 :      * 'hackedSource' which should actually be compiled. For a given 'source'
     324                 :      * there may only ever be one corresponding 'hackedSource'. Thus, we assume
     325                 :      * there is some single pure function mapping 'source' to 'hackedSource'
     326                 :      * that is always respected in calls to getHack. Note that this restriction
     327                 :      * only applies to 'getHack': a single 'source' value may be passed to both
     328                 :      * 'get' and 'getHack'.
     329                 :      */
     330                 :     bool getHack(JSContext *cx, JSAtom *source, JSAtom *hackedSource, RegExpFlag flags,
     331                 :                  RegExpGuard *g);
     332                 : 
     333                 :     /*
     334                 :      * To avoid atomizing 'hackedSource', callers may call 'lookupHack',
     335                 :      * passing only the original 'source'. Due to the abovementioned unique
     336                 :      * mapping property, 'hackedSource' is unambiguous.
     337                 :      */
     338                 :     bool lookupHack(JSAtom *source, RegExpFlag flags, JSContext *cx, RegExpGuard *g);
     339                 : };
     340                 : 
     341                 : class RegExpObject : public JSObject
     342                 : {
     343                 :     typedef detail::RegExpCode RegExpCode;
     344                 : 
     345                 :     static const unsigned LAST_INDEX_SLOT          = 0;
     346                 :     static const unsigned SOURCE_SLOT              = 1;
     347                 :     static const unsigned GLOBAL_FLAG_SLOT         = 2;
     348                 :     static const unsigned IGNORE_CASE_FLAG_SLOT    = 3;
     349                 :     static const unsigned MULTILINE_FLAG_SLOT      = 4;
     350                 :     static const unsigned STICKY_FLAG_SLOT         = 5;
     351                 : 
     352                 :   public:
     353                 :     static const unsigned RESERVED_SLOTS = 6;
     354                 : 
     355                 :     /*
     356                 :      * Note: The regexp statics flags are OR'd into the provided flags,
     357                 :      * so this function is really meant for object creation during code
     358                 :      * execution, as opposed to during something like XDR.
     359                 :      */
     360                 :     static RegExpObject *
     361                 :     create(JSContext *cx, RegExpStatics *res, const jschar *chars, size_t length,
     362                 :            RegExpFlag flags, TokenStream *ts);
     363                 : 
     364                 :     static RegExpObject *
     365                 :     createNoStatics(JSContext *cx, const jschar *chars, size_t length, RegExpFlag flags,
     366                 :                     TokenStream *ts);
     367                 : 
     368                 :     static RegExpObject *
     369                 :     createNoStatics(JSContext *cx, JSAtom *atom, RegExpFlag flags, TokenStream *ts);
     370                 : 
     371                 :     /*
     372                 :      * Run the regular expression over the input text.
     373                 :      *
     374                 :      * Results are placed in |output| as integer pairs. For eaxmple,
     375                 :      * |output[0]| and |output[1]| represent the text indices that make
     376                 :      * up the "0" (whole match) pair. Capturing parens will result in
     377                 :      * more output.
     378                 :      *
     379                 :      * N.B. it's the responsibility of the caller to hook the |output|
     380                 :      * into the |RegExpStatics| appropriately, if necessary.
     381                 :      */
     382                 :     RegExpRunStatus
     383                 :     execute(JSContext *cx, const jschar *chars, size_t length, size_t *lastIndex,
     384                 :             MatchPairs **output);
     385                 : 
     386                 :     /* Accessors. */
     387                 : 
     388         1141929 :     const Value &getLastIndex() const {
     389         1141929 :         return getSlot(LAST_INDEX_SLOT);
     390                 :     }
     391                 :     inline void setLastIndex(const Value &v);
     392                 :     inline void setLastIndex(double d);
     393                 :     inline void zeroLastIndex();
     394                 : 
     395                 :     JSFlatString *toString(JSContext *cx) const;
     396                 : 
     397         1748577 :     JSAtom *getSource() const {
     398         1748577 :         return &getSlot(SOURCE_SLOT).toString()->asAtom();
     399                 :     }
     400                 :     inline void setSource(JSAtom *source);
     401                 : 
     402          610955 :     RegExpFlag getFlags() const {
     403          610955 :         unsigned flags = 0;
     404          610955 :         flags |= global() ? GlobalFlag : 0;
     405          610955 :         flags |= ignoreCase() ? IgnoreCaseFlag : 0;
     406          610955 :         flags |= multiline() ? MultilineFlag : 0;
     407          610955 :         flags |= sticky() ? StickyFlag : 0;
     408          610955 :         return RegExpFlag(flags);
     409                 :     }
     410                 : 
     411                 :     /* Flags. */
     412                 : 
     413                 :     inline void setIgnoreCase(bool enabled);
     414                 :     inline void setGlobal(bool enabled);
     415                 :     inline void setMultiline(bool enabled);
     416                 :     inline void setSticky(bool enabled);
     417          611604 :     bool ignoreCase() const { return getSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); }
     418          614344 :     bool global() const     { return getSlot(GLOBAL_FLAG_SLOT).toBoolean(); }
     419          611604 :     bool multiline() const  { return getSlot(MULTILINE_FLAG_SLOT).toBoolean(); }
     420          612541 :     bool sticky() const     { return getSlot(STICKY_FLAG_SLOT).toBoolean(); }
     421                 : 
     422                 :     inline void shared(RegExpGuard *g) const;
     423                 :     inline bool getShared(JSContext *cx, RegExpGuard *g);
     424                 :     inline void setShared(JSContext *cx, RegExpShared &shared);
     425                 : 
     426                 :   private:
     427                 :     friend class RegExpObjectBuilder;
     428                 : 
     429                 :     /*
     430                 :      * Compute the initial shape to associate with fresh RegExp objects,
     431                 :      * encoding their initial properties. Return the shape after
     432                 :      * changing this regular expression object's last property to it.
     433                 :      */
     434                 :     Shape *assignInitialShape(JSContext *cx);
     435                 : 
     436                 :     inline bool init(JSContext *cx, JSAtom *source, RegExpFlag flags);
     437                 : 
     438                 :     /*
     439                 :      * Precondition: the syntax for |source| has already been validated.
     440                 :      * Side effect: sets the private field.
     441                 :      */
     442                 :     bool createShared(JSContext *cx, RegExpGuard *g);
     443                 :     RegExpShared *maybeShared() const;
     444                 : 
     445                 :     RegExpObject() MOZ_DELETE;
     446                 :     RegExpObject &operator=(const RegExpObject &reo) MOZ_DELETE;
     447                 : 
     448                 :     /* Call setShared in preference to setPrivate. */
     449                 :     void setPrivate(void *priv) MOZ_DELETE;
     450                 : };
     451                 : 
     452                 : /*
     453                 :  * Parse regexp flags. Report an error and return false if an invalid
     454                 :  * sequence of flags is encountered (repeat/invalid flag).
     455                 :  *
     456                 :  * N.B. flagStr must be rooted.
     457                 :  */
     458                 : bool
     459                 : ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut);
     460                 : 
     461                 : /*
     462                 :  * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared.
     463                 :  *
     464                 :  * Beware: this RegExpShared can be owned by a compartment other than
     465                 :  * cx->compartment. Normal RegExpGuard (which is necessary anyways)
     466                 :  * will protect the object but it is important not to assign the return value
     467                 :  * to be the private of any RegExpObject.
     468                 :  */
     469                 : inline bool
     470                 : RegExpToShared(JSContext *cx, JSObject &obj, RegExpGuard *g);
     471                 : 
     472                 : bool
     473                 : XDRScriptRegExpObject(JSXDRState *xdr, HeapPtrObject *objp);
     474                 : 
     475                 : } /* namespace js */
     476                 : 
     477                 : #endif

Generated by: LCOV version 1.7