LCOV - code coverage report
Current view: directory - parser/htmlparser/src - nsScanner.h (source / functions) Found Hit Coverage
Test: app.info Lines: 10 9 90.0 %
Date: 2012-06-02 Functions: 4 4 100.0 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is mozilla.org code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *
      24                 :  * Alternatively, the contents of this file may be used under the terms of
      25                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      26                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28                 :  * of those above. If you wish to allow use of your version of this file only
      29                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30                 :  * use your version of this file under the terms of the MPL, indicate your
      31                 :  * decision by deleting the provisions above and replace them with the notice
      32                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33                 :  * the provisions above, a recipient may use your version of this file under
      34                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      35                 :  *
      36                 :  * ***** END LICENSE BLOCK ***** */
      37                 : 
      38                 : 
      39                 : /**
      40                 :  * MODULE NOTES:
      41                 :  * @update  gess 4/1/98
      42                 :  * 
      43                 :  * The scanner is a low-level service class that knows
      44                 :  * how to consume characters out of an (internal) stream.
      45                 :  * This class also offers a series of utility methods
      46                 :  * that most tokenizers want, such as readUntil()
      47                 :  * and SkipWhitespace().
      48                 :  */
      49                 : 
      50                 : 
      51                 : #ifndef SCANNER
      52                 : #define SCANNER
      53                 : 
      54                 : #include "nsCOMPtr.h"
      55                 : #include "nsString.h"
      56                 : #include "nsIParser.h"
      57                 : #include "prtypes.h"
      58                 : #include "nsIUnicodeDecoder.h"
      59                 : #include "nsScannerString.h"
      60                 : 
      61                 : class nsParser;
      62                 : 
      63                 : class nsReadEndCondition {
      64                 : public:
      65                 :   const PRUnichar *mChars;
      66                 :   PRUnichar mFilter;
      67                 :   explicit nsReadEndCondition(const PRUnichar* aTerminateChars);
      68                 : private:
      69                 :   nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
      70                 :   void operator=(const nsReadEndCondition& aOther); // No assigning
      71                 : };
      72                 : 
      73                 : class nsScanner {
      74                 :   public:
      75                 : 
      76                 :       /**
      77                 :        *  Use this constructor if you want i/o to be based on 
      78                 :        *  a single string you hand in during construction.
      79                 :        *  This short cut was added for Javascript.
      80                 :        *
      81                 :        *  @update  ftang 3/02/99
      82                 :        *  @param   aCharset charset
      83                 :        *  @param   aCharsetSource - where the charset info came from 
      84                 :        *  @param   aMode represents the parser mode (nav, other)
      85                 :        *  @return  
      86                 :        */
      87                 :       nsScanner(const nsAString& anHTMLString, const nsACString& aCharset, PRInt32 aSource);
      88                 : 
      89                 :       /**
      90                 :        *  Use this constructor if you want i/o to be based on 
      91                 :        *  a file (therefore a stream) or just data you provide via Append().
      92                 :        *
      93                 :        *  @update  ftang 3/02/99
      94                 :        *  @param   aCharset charset
      95                 :        *  @param   aCharsetSource - where the charset info came from 
      96                 :        *  @param   aMode represents the parser mode (nav, other)
      97                 :        *  @return  
      98                 :        */
      99                 :       nsScanner(nsString& aFilename,bool aCreateStream, const nsACString& aCharset, PRInt32 aSource);
     100                 : 
     101                 :       ~nsScanner();
     102                 : 
     103                 :       /**
     104                 :        *  retrieve next char from internal input stream
     105                 :        *  
     106                 :        *  @update  gess 3/25/98
     107                 :        *  @param   ch is the char to accept new value
     108                 :        *  @return  error code reflecting read status
     109                 :        */
     110                 :       nsresult GetChar(PRUnichar& ch);
     111                 : 
     112                 :       /**
     113                 :        *  peek ahead to consume next char from scanner's internal
     114                 :        *  input buffer
     115                 :        *  
     116                 :        *  @update  gess 3/25/98
     117                 :        *  @param   ch is the char to accept new value
     118                 :        *  @return  error code reflecting read status
     119                 :        */
     120                 :       nsresult Peek(PRUnichar& ch, PRUint32 aOffset=0);
     121                 : 
     122                 :       nsresult Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset = 0);
     123                 : 
     124                 :       /**
     125                 :        *  Skip over chars as long as they equal given char
     126                 :        *  
     127                 :        *  @update  gess 3/25/98
     128                 :        *  @param   char to be skipped
     129                 :        *  @return  error code
     130                 :        */
     131                 :       nsresult SkipOver(PRUnichar aSkipChar);
     132                 : 
     133                 :       /**
     134                 :        *  Skip whitespace on scanner input stream
     135                 :        *  
     136                 :        *  @update  gess 3/25/98
     137                 :        *  @return  error status
     138                 :        */
     139                 :       nsresult SkipWhitespace(PRInt32& aNewlinesSkipped);
     140                 : 
     141                 :       /**
     142                 :        *  Consume characters until you run into space, a '<', a '>', or a '/'.
     143                 :        *  
     144                 :        *  @param   aString - receives new data from stream
     145                 :        *  @return  error code
     146                 :        */
     147                 :       nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);
     148                 : 
     149                 :       /**
     150                 :        *  Consume characters until you run into a char that's not valid in an
     151                 :        *  entity name
     152                 :        *  
     153                 :        *  @param   aString - receives new data from stream
     154                 :        *  @return  error code
     155                 :        */
     156                 :       nsresult ReadEntityIdentifier(nsString& aString);
     157                 :       nsresult ReadNumber(nsString& aString,PRInt32 aBase);
     158                 :       nsresult ReadWhitespace(nsScannerSharedSubstring& aString, 
     159                 :                               PRInt32& aNewlinesSkipped,
     160                 :                               bool& aHaveCR);
     161                 :       nsresult ReadWhitespace(nsScannerIterator& aStart, 
     162                 :                               nsScannerIterator& aEnd,
     163                 :                               PRInt32& aNewlinesSkipped);
     164                 : 
     165                 :       /**
     166                 :        *  Consume characters until you find the terminal char
     167                 :        *  
     168                 :        *  @update  gess 3/25/98
     169                 :        *  @param   aString receives new data from stream
     170                 :        *  @param   aTerminal contains terminating char
     171                 :        *  @param   addTerminal tells us whether to append terminal to aString
     172                 :        *  @return  error code
     173                 :        */
     174                 :       nsresult ReadUntil(nsAString& aString,
     175                 :                          PRUnichar aTerminal,
     176                 :                          bool addTerminal);
     177                 : 
     178                 :       /**
     179                 :        *  Consume characters until you find one contained in given
     180                 :        *  terminal set.
     181                 :        *  
     182                 :        *  @update  gess 3/25/98
     183                 :        *  @param   aString receives new data from stream
     184                 :        *  @param   aTermSet contains set of terminating chars
     185                 :        *  @param   addTerminal tells us whether to append terminal to aString
     186                 :        *  @return  error code
     187                 :        */
     188                 :       nsresult ReadUntil(nsAString& aString,
     189                 :                          const nsReadEndCondition& aEndCondition, 
     190                 :                          bool addTerminal);
     191                 : 
     192                 :       nsresult ReadUntil(nsScannerSharedSubstring& aString,
     193                 :                          const nsReadEndCondition& aEndCondition,
     194                 :                          bool addTerminal);
     195                 : 
     196                 :       nsresult ReadUntil(nsScannerIterator& aStart,
     197                 :                          nsScannerIterator& aEnd,
     198                 :                          const nsReadEndCondition& aEndCondition, 
     199                 :                          bool addTerminal);
     200                 : 
     201                 :       /**
     202                 :        *  Records current offset position in input stream. This allows us
     203                 :        *  to back up to this point if the need should arise, such as when
     204                 :        *  tokenization gets interrupted.
     205                 :        *  
     206                 :        *  @update  gess 5/12/98
     207                 :        *  @param   
     208                 :        *  @return  
     209                 :        */
     210                 :       PRInt32 Mark(void);
     211                 : 
     212                 :       /**
     213                 :        *  Resets current offset position of input stream to marked position. 
     214                 :        *  This allows us to back up to this point if the need should arise, 
     215                 :        *  such as when tokenization gets interrupted.
     216                 :        *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
     217                 :        *  
     218                 :        *  @update  gess 5/12/98
     219                 :        *  @param   
     220                 :        *  @return  
     221                 :        */
     222                 :       void RewindToMark(void);
     223                 : 
     224                 : 
     225                 :       /**
     226                 :        *  
     227                 :        *  
     228                 :        *  @update  harishd 01/12/99
     229                 :        *  @param   
     230                 :        *  @return  
     231                 :        */
     232                 :       bool UngetReadable(const nsAString& aBuffer);
     233                 : 
     234                 :       /**
     235                 :        *  
     236                 :        *  
     237                 :        *  @update  gess 5/13/98
     238                 :        *  @param   
     239                 :        *  @return  
     240                 :        */
     241                 :       nsresult Append(const nsAString& aBuffer);
     242                 : 
     243                 :       /**
     244                 :        *  
     245                 :        *  
     246                 :        *  @update  gess 5/21/98
     247                 :        *  @param   
     248                 :        *  @return  
     249                 :        */
     250                 :       nsresult Append(const char* aBuffer, PRUint32 aLen,
     251                 :                       nsIRequest *aRequest);
     252                 : 
     253                 :       /**
     254                 :        *  Call this to copy bytes out of the scanner that have not yet been consumed
     255                 :        *  by the tokenization process.
     256                 :        *  
     257                 :        *  @update  gess 5/12/98
     258                 :        *  @param   aCopyBuffer is where the scanner buffer will be copied to
     259                 :        *  @return  nada
     260                 :        */
     261                 :       void CopyUnusedData(nsString& aCopyBuffer);
     262                 : 
     263                 :       /**
     264                 :        *  Retrieve the name of the file that the scanner is reading from.
     265                 :        *  In some cases, it's just a given name, because the scanner isn't
     266                 :        *  really reading from a file.
     267                 :        *  
     268                 :        *  @update  gess 5/12/98
     269                 :        *  @return  
     270                 :        */
     271                 :       nsString& GetFilename(void);
     272                 : 
     273                 :       static void SelfTest();
     274                 : 
     275                 :       /**
     276                 :        *  Use this setter to change the scanner's unicode decoder
     277                 :        *
     278                 :        *  @update  ftang 3/02/99
     279                 :        *  @param   aCharset a normalized (alias resolved) charset name
     280                 :        *  @param   aCharsetSource- where the charset info came from
     281                 :        *  @return  
     282                 :        */
     283                 :       nsresult SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource);
     284                 : 
     285                 :       void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
     286                 :       void CurrentPosition(nsScannerIterator& aPosition);
     287                 :       void EndReading(nsScannerIterator& aPosition);
     288                 :       void SetPosition(nsScannerIterator& aPosition,
     289                 :                        bool aTruncate = false,
     290                 :                        bool aReverse = false);
     291                 :       void ReplaceCharacter(nsScannerIterator& aPosition,
     292                 :                             PRUnichar aChar);
     293                 : 
     294                 :       /**
     295                 :        * Internal method used to cause the internal buffer to
     296                 :        * be filled with data. 
     297                 :        *
     298                 :        * @update  gess4/3/98
     299                 :        */
     300             732 :       bool      IsIncremental(void) {return mIncremental;}
     301            3332 :       void      SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
     302                 : 
     303                 :       /**
     304                 :        * Return the position of the first non-whitespace
     305                 :        * character. This is only reliable before consumers start
     306                 :        * reading from this scanner.
     307                 :        */
     308            6766 :       PRInt32 FirstNonWhitespacePosition()
     309                 :       {
     310            6766 :         return mFirstNonWhitespacePosition;
     311                 :       }
     312                 : 
     313                 :       /**
     314                 :        * Override replacement character used by nsIUnicodeDecoder.
     315                 :        * Default behavior is that it uses nsIUnicodeDecoder's mapping.
     316                 :        *
     317                 :        * @param aReplacementCharacter the replacement character
     318                 :        *        XML (expat) parser uses 0xffff
     319                 :        */
     320                 :       void OverrideReplacementCharacter(PRUnichar aReplacementCharacter);
     321                 : 
     322                 :   protected:
     323                 : 
     324                 :       bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, PRInt32 aErrorPos = -1);
     325               4 :       bool AppendToBuffer(const nsAString& aStr)
     326                 :       {
     327               4 :         nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
     328               4 :         if (!buf)
     329               0 :           return false;
     330               4 :         AppendToBuffer(buf, nsnull);
     331               4 :         return true;
     332                 :       }
     333                 : 
     334                 :       nsScannerString*             mSlidingBuffer;
     335                 :       nsScannerIterator            mCurrentPosition; // The position we will next read from in the scanner buffer
     336                 :       nsScannerIterator            mMarkPosition;    // The position last marked (we may rewind to here)
     337                 :       nsScannerIterator            mEndPosition;     // The current end of the scanner buffer
     338                 :       nsScannerIterator            mFirstInvalidPosition; // The position of the first invalid character that was detected
     339                 :       nsString        mFilename;
     340                 :       PRUint32        mCountRemaining; // The number of bytes still to be read
     341                 :                                        // from the scanner buffer
     342                 :       bool            mIncremental;
     343                 :       bool            mHasInvalidCharacter;
     344                 :       PRUnichar       mReplacementCharacter;
     345                 :       PRInt32         mFirstNonWhitespacePosition;
     346                 :       PRInt32         mCharsetSource;
     347                 :       nsCString       mCharset;
     348                 :       nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
     349                 : 
     350                 :   private:
     351                 :       nsScanner &operator =(const nsScanner &); // Not implemented.
     352                 : };
     353                 : 
     354                 : #endif
     355                 : 
     356                 : 

Generated by: LCOV version 1.7