LCOV - code coverage report
Current view: directory - js/src/yarr - YarrPattern.h (source / functions) Found Hit Coverage
Test: app.info Lines: 151 118 78.1 %
Date: 2012-06-02 Functions: 40 35 87.5 %

       1                 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2                 :  * vim: set ts=8 sw=4 et tw=99 ft=cpp:
       3                 :  *
       4                 :  * ***** BEGIN LICENSE BLOCK *****
       5                 :  * Copyright (C) 2009 Apple Inc. All rights reserved.
       6                 :  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
       7                 :  *
       8                 :  * Redistribution and use in source and binary forms, with or without
       9                 :  * modification, are permitted provided that the following conditions
      10                 :  * are met:
      11                 :  * 1. Redistributions of source code must retain the above copyright
      12                 :  *    notice, this list of conditions and the following disclaimer.
      13                 :  * 2. Redistributions in binary form must reproduce the above copyright
      14                 :  *    notice, this list of conditions and the following disclaimer in the
      15                 :  *    documentation and/or other materials provided with the distribution.
      16                 :  *
      17                 :  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
      18                 :  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      19                 :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
      20                 :  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
      21                 :  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      22                 :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      23                 :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
      24                 :  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
      25                 :  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      26                 :  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      27                 :  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
      28                 :  *
      29                 :  * ***** END LICENSE BLOCK ***** */
      30                 : 
      31                 : #ifndef YarrPattern_h
      32                 : #define YarrPattern_h
      33                 : 
      34                 : #include "wtfbridge.h"
      35                 : #include "ASCIICType.h"
      36                 : 
      37                 : namespace JSC { namespace Yarr {
      38                 : 
      39                 : enum ErrorCode {
      40                 :     NoError,
      41                 :     PatternTooLarge,
      42                 :     QuantifierOutOfOrder,
      43                 :     QuantifierWithoutAtom,
      44                 :     MissingParentheses,
      45                 :     ParenthesesUnmatched,
      46                 :     ParenthesesTypeInvalid,
      47                 :     CharacterClassUnmatched,
      48                 :     CharacterClassInvalidRange,
      49                 :     CharacterClassOutOfOrder,
      50                 :     EscapeUnterminated,
      51                 :     QuantifierTooLarge,
      52                 :     NumberOfErrorCodes
      53                 : };
      54                 : 
      55                 : struct PatternDisjunction;
      56                 : 
      57          180600 : struct CharacterRange {
      58                 :     UChar begin;
      59                 :     UChar end;
      60                 : 
      61           89400 :     CharacterRange(UChar begin, UChar end)
      62                 :         : begin(begin)
      63           89400 :         , end(end)
      64                 :     {
      65           89400 :     }
      66                 : };
      67                 : 
      68            4820 : struct CharacterClassTable : RefCounted<CharacterClassTable> {
      69                 :     friend class js::OffTheBooks;
      70                 :     const char* m_table;
      71                 :     bool m_inverted;
      72            4820 :     static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
      73                 :     {
      74            4820 :         return adoptRef(js::OffTheBooks::new_<CharacterClassTable>(table, inverted));
      75                 :     }
      76                 : 
      77                 : private:
      78            4820 :     CharacterClassTable(const char* table, bool inverted)
      79                 :         : m_table(table)
      80            4820 :         , m_inverted(inverted)
      81                 :     {
      82            4820 :     }
      83                 : };
      84                 : 
      85                 : struct CharacterClass {
      86                 :     WTF_MAKE_FAST_ALLOCATED
      87                 : public:
      88                 :     // All CharacterClass instances have to have the full set of matches and ranges,
      89                 :     // they may have an optional table for faster lookups (which must match the
      90                 :     // specified matches and ranges)
      91           54364 :     CharacterClass(PassRefPtr<CharacterClassTable> table)
      92           54364 :         : m_table(table)
      93                 :     {
      94           54364 :     }
      95           54364 :     ~CharacterClass()
      96           54364 :     {
      97           54364 :         js::Foreground::delete_(m_table.get());
      98           54364 :     }
      99                 :     Vector<UChar> m_matches;
     100                 :     Vector<CharacterRange> m_ranges;
     101                 :     Vector<UChar> m_matchesUnicode;
     102                 :     Vector<CharacterRange> m_rangesUnicode;
     103                 :     RefPtr<CharacterClassTable> m_table;
     104                 : };
     105                 : 
     106                 : enum QuantifierType {
     107                 :     QuantifierFixedCount,
     108                 :     QuantifierGreedy,
     109                 :     QuantifierNonGreedy
     110                 : };
     111                 : 
     112          829856 : struct PatternTerm {
     113                 :     enum Type {
     114                 :         TypeAssertionBOL,
     115                 :         TypeAssertionEOL,
     116                 :         TypeAssertionWordBoundary,
     117                 :         TypePatternCharacter,
     118                 :         TypeCharacterClass,
     119                 :         TypeBackReference,
     120                 :         TypeForwardReference,
     121                 :         TypeParenthesesSubpattern,
     122                 :         TypeParentheticalAssertion
     123                 :     } type;
     124                 :     bool m_capture :1;
     125                 :     bool m_invert :1;
     126                 :     union {
     127                 :         UChar patternCharacter;
     128                 :         CharacterClass* characterClass;
     129                 :         unsigned backReferenceSubpatternId;
     130                 :         struct {
     131                 :             PatternDisjunction* disjunction;
     132                 :             unsigned subpatternId;
     133                 :             unsigned lastSubpatternId;
     134                 :             bool isCopy;
     135                 :             bool isTerminal;
     136                 :         } parentheses;
     137                 :     };
     138                 :     QuantifierType quantityType;
     139                 :     unsigned quantityCount;
     140                 :     int inputPosition;
     141                 :     unsigned frameLocation;
     142                 : 
     143                 :     // No-argument constructor for js::Vector.
     144               0 :     PatternTerm()
     145                 :         : type(PatternTerm::TypePatternCharacter)
     146                 :         , m_capture(false)
     147               0 :         , m_invert(false)
     148                 :     {
     149               0 :         patternCharacter = 0;
     150               0 :         quantityType = QuantifierFixedCount;
     151               0 :         quantityCount = 1;
     152               0 :     }
     153                 : 
     154          201564 :     PatternTerm(UChar ch)
     155                 :         : type(PatternTerm::TypePatternCharacter)
     156                 :         , m_capture(false)
     157          201564 :         , m_invert(false)
     158                 :     {
     159          201564 :         patternCharacter = ch;
     160          201564 :         quantityType = QuantifierFixedCount;
     161          201564 :         quantityCount = 1;
     162          201564 :     }
     163                 : 
     164           62014 :     PatternTerm(CharacterClass* charClass, bool invert)
     165                 :         : type(PatternTerm::TypeCharacterClass)
     166                 :         , m_capture(false)
     167           62014 :         , m_invert(invert)
     168                 :     {
     169           62014 :         characterClass = charClass;
     170           62014 :         quantityType = QuantifierFixedCount;
     171           62014 :         quantityCount = 1;
     172           62014 :     }
     173                 : 
     174           49797 :     PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
     175                 :         : type(type)
     176                 :         , m_capture(capture)
     177           49797 :         , m_invert(invert)
     178                 :     {
     179           49797 :         parentheses.disjunction = disjunction;
     180           49797 :         parentheses.subpatternId = subpatternId;
     181           49797 :         parentheses.isCopy = false;
     182           49797 :         parentheses.isTerminal = false;
     183           49797 :         quantityType = QuantifierFixedCount;
     184           49797 :         quantityCount = 1;
     185           49797 :     }
     186                 :     
     187           41662 :     PatternTerm(Type type, bool invert = false)
     188                 :         : type(type)
     189                 :         , m_capture(false)
     190           41662 :         , m_invert(invert)
     191                 :     {
     192           41662 :         quantityType = QuantifierFixedCount;
     193           41662 :         quantityCount = 1;
     194           41662 :     }
     195                 : 
     196              18 :     PatternTerm(unsigned spatternId)
     197                 :         : type(TypeBackReference)
     198                 :         , m_capture(false)
     199              18 :         , m_invert(false)
     200                 :     {
     201              18 :         backReferenceSubpatternId = spatternId;
     202              18 :         quantityType = QuantifierFixedCount;
     203              18 :         quantityCount = 1;
     204              18 :     }
     205                 : 
     206               0 :     static PatternTerm ForwardReference()
     207                 :     {
     208               0 :         return PatternTerm(TypeForwardReference);
     209                 :     }
     210                 : 
     211           19762 :     static PatternTerm BOL()
     212                 :     {
     213           19762 :         return PatternTerm(TypeAssertionBOL);
     214                 :     }
     215                 : 
     216           21477 :     static PatternTerm EOL()
     217                 :     {
     218           21477 :         return PatternTerm(TypeAssertionEOL);
     219                 :     }
     220                 : 
     221             423 :     static PatternTerm WordBoundary(bool invert)
     222                 :     {
     223             423 :         return PatternTerm(TypeAssertionWordBoundary, invert);
     224                 :     }
     225                 :     
     226          101687 :     bool invert()
     227                 :     {
     228          101687 :         return m_invert;
     229                 :     }
     230                 : 
     231          177975 :     bool capture()
     232                 :     {
     233          177975 :         return m_capture;
     234                 :     }
     235                 :     
     236          108514 :     void quantify(unsigned count, QuantifierType type)
     237                 :     {
     238          108514 :         quantityCount = count;
     239          108514 :         quantityType = type;
     240          108514 :     }
     241                 : };
     242                 : 
     243          122128 : struct PatternAlternative {
     244                 :     WTF_MAKE_FAST_ALLOCATED
     245                 : public:
     246          122128 :     PatternAlternative(PatternDisjunction* disjunction)
     247                 :         : m_parent(disjunction)
     248                 :         , m_onceThrough(false)
     249                 :         , m_hasFixedSize(false)
     250                 :         , m_startsWithBOL(false)
     251          122128 :         , m_containsBOL(false)
     252                 :     {
     253          122128 :     }
     254                 : 
     255          201955 :     PatternTerm& lastTerm()
     256                 :     {
     257          201955 :         ASSERT(m_terms.size());
     258          201955 :         return m_terms[m_terms.size() - 1];
     259                 :     }
     260                 :     
     261               0 :     void removeLastTerm()
     262                 :     {
     263               0 :         ASSERT(m_terms.size());
     264               0 :         m_terms.shrink(m_terms.size() - 1);
     265               0 :     }
     266                 :     
     267           19679 :     void setOnceThrough()
     268                 :     {
     269           19679 :         m_onceThrough = true;
     270           19679 :     }
     271                 :     
     272          101656 :     bool onceThrough()
     273                 :     {
     274          101656 :         return m_onceThrough;
     275                 :     }
     276                 : 
     277                 :     Vector<PatternTerm> m_terms;
     278                 :     PatternDisjunction* m_parent;
     279                 :     unsigned m_minimumSize;
     280                 :     bool m_onceThrough : 1;
     281                 :     bool m_hasFixedSize : 1;
     282                 :     bool m_startsWithBOL : 1;
     283                 :     bool m_containsBOL : 1;
     284                 : };
     285                 : 
     286                 : struct PatternDisjunction {
     287                 :     WTF_MAKE_FAST_ALLOCATED
     288                 : public:
     289          113967 :     PatternDisjunction(PatternAlternative* parent = 0)
     290                 :         : m_parent(parent)
     291          113967 :         , m_hasFixedSize(false)
     292                 :     {
     293          113967 :     }
     294                 :     
     295          113967 :     ~PatternDisjunction()
     296          113967 :     {
     297          113967 :         deleteAllValues(m_alternatives);
     298          113967 :     }
     299                 : 
     300          122128 :     PatternAlternative* addNewAlternative()
     301                 :     {
     302          122128 :         PatternAlternative* alternative = js::OffTheBooks::new_<PatternAlternative>(this);
     303          122128 :         m_alternatives.append(alternative);
     304          122128 :         return alternative;
     305                 :     }
     306                 : 
     307                 :     Vector<PatternAlternative*> m_alternatives;
     308                 :     PatternAlternative* m_parent;
     309                 :     unsigned m_minimumSize;
     310                 :     unsigned m_callFrameSize;
     311                 :     bool m_hasFixedSize;
     312                 : };
     313                 : 
     314                 : // You probably don't want to be calling these functions directly
     315                 : // (please to be calling newlineCharacterClass() et al on your
     316                 : // friendly neighborhood YarrPattern instance to get nicely
     317                 : // cached copies).
     318                 : CharacterClass* newlineCreate();
     319                 : CharacterClass* digitsCreate();
     320                 : CharacterClass* spacesCreate();
     321                 : CharacterClass* wordcharCreate();
     322                 : CharacterClass* nondigitsCreate();
     323                 : CharacterClass* nonspacesCreate();
     324                 : CharacterClass* nonwordcharCreate();
     325                 : 
     326                 : struct TermChain {
     327                 :     TermChain(PatternTerm term)
     328                 :         : term(term)
     329                 :     {}
     330                 : 
     331                 :     PatternTerm term;
     332                 :     Vector<TermChain> hotTerms;
     333                 : };
     334                 : 
     335                 : struct YarrPattern {
     336                 :     YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, ErrorCode* error);
     337                 : 
     338           58244 :     ~YarrPattern()
     339           58244 :     {
     340           58244 :         deleteAllValues(m_disjunctions);
     341           58244 :         deleteAllValues(m_userCharacterClasses);
     342           58244 :     }
     343                 : 
     344               0 :     void reset()
     345                 :     {
     346               0 :         m_numSubpatterns = 0;
     347               0 :         m_maxBackReference = 0;
     348                 : 
     349               0 :         m_containsBackreferences = false;
     350               0 :         m_containsBOL = false;
     351                 : 
     352               0 :         newlineCached = 0;
     353               0 :         digitsCached = 0;
     354               0 :         spacesCached = 0;
     355               0 :         wordcharCached = 0;
     356               0 :         nondigitsCached = 0;
     357               0 :         nonspacesCached = 0;
     358               0 :         nonwordcharCached = 0;
     359                 : 
     360               0 :         deleteAllValues(m_disjunctions);
     361               0 :         m_disjunctions.clear();
     362               0 :         deleteAllValues(m_userCharacterClasses);
     363               0 :         m_userCharacterClasses.clear();
     364               0 :     }
     365                 : 
     366           58244 :     bool containsIllegalBackReference()
     367                 :     {
     368           58244 :         return m_maxBackReference > m_numSubpatterns;
     369                 :     }
     370                 : 
     371            8352 :     CharacterClass* newlineCharacterClass()
     372                 :     {
     373            8352 :         if (!newlineCached)
     374            7875 :             m_userCharacterClasses.append(newlineCached = newlineCreate());
     375            8352 :         return newlineCached;
     376                 :     }
     377           25151 :     CharacterClass* digitsCharacterClass()
     378                 :     {
     379           25151 :         if (!digitsCached)
     380           11132 :             m_userCharacterClasses.append(digitsCached = digitsCreate());
     381           25151 :         return digitsCached;
     382                 :     }
     383             908 :     CharacterClass* spacesCharacterClass()
     384                 :     {
     385             908 :         if (!spacesCached)
     386             609 :             m_userCharacterClasses.append(spacesCached = spacesCreate());
     387             908 :         return spacesCached;
     388                 :     }
     389            5203 :     CharacterClass* wordcharCharacterClass()
     390                 :     {
     391            5203 :         if (!wordcharCached)
     392            4193 :             m_userCharacterClasses.append(wordcharCached = wordcharCreate());
     393            5203 :         return wordcharCached;
     394                 :     }
     395               0 :     CharacterClass* nondigitsCharacterClass()
     396                 :     {
     397               0 :         if (!nondigitsCached)
     398               0 :             m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
     399               0 :         return nondigitsCached;
     400                 :     }
     401               9 :     CharacterClass* nonspacesCharacterClass()
     402                 :     {
     403               9 :         if (!nonspacesCached)
     404               9 :             m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
     405               9 :         return nonspacesCached;
     406                 :     }
     407               9 :     CharacterClass* nonwordcharCharacterClass()
     408                 :     {
     409               9 :         if (!nonwordcharCached)
     410               9 :             m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
     411               9 :         return nonwordcharCached;
     412                 :     }
     413                 : 
     414                 :     bool m_ignoreCase : 1;
     415                 :     bool m_multiline : 1;
     416                 :     bool m_containsBackreferences : 1;
     417                 :     bool m_containsBOL : 1;
     418                 :     unsigned m_numSubpatterns;
     419                 :     unsigned m_maxBackReference;
     420                 :     PatternDisjunction* m_body;
     421                 :     Vector<PatternDisjunction*, 4> m_disjunctions;
     422                 :     Vector<CharacterClass*> m_userCharacterClasses;
     423                 : 
     424                 : private:
     425                 :     ErrorCode compile(const UString& patternString);
     426                 : 
     427                 :     CharacterClass* newlineCached;
     428                 :     CharacterClass* digitsCached;
     429                 :     CharacterClass* spacesCached;
     430                 :     CharacterClass* wordcharCached;
     431                 :     CharacterClass* nondigitsCached;
     432                 :     CharacterClass* nonspacesCached;
     433                 :     CharacterClass* nonwordcharCached;
     434                 : };
     435                 : 
     436                 : } } // namespace JSC::Yarr
     437                 : 
     438                 : #endif // YarrPattern_h

Generated by: LCOV version 1.7