LCOV - code coverage report
Current view: directory - objdir/dist/include - nsHTMLTokens.h (source / functions) Found Hit Coverage
Test: app.info Lines: 35 22 62.9 %
Date: 2012-06-02 Functions: 43 19 44.2 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is mozilla.org code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *
      24                 :  * Alternatively, the contents of this file may be used under the terms of
      25                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      26                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28                 :  * of those above. If you wish to allow use of your version of this file only
      29                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30                 :  * use your version of this file under the terms of the MPL, indicate your
      31                 :  * decision by deleting the provisions above and replace them with the notice
      32                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33                 :  * the provisions above, a recipient may use your version of this file under
      34                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      35                 :  *
      36                 :  * ***** END LICENSE BLOCK ***** */
      37                 : 
      38                 : /**
      39                 :  * MODULE NOTES:
      40                 :  * @update  gess 4/1/98
      41                 :  *
      42                 :  * This file contains the declarations for all the HTML specific token types that
      43                 :  * our DTD's understand. In fact, the same set of token types are used for XML.
      44                 :  * Currently we have tokens for text, comments, start and end tags, entities,
      45                 :  * attributes, style, script and skipped content. Whitespace and newlines also
      46                 :  * have their own token types, but don't count on them to stay forever.
      47                 :  *
      48                 :  * If you're looking for the html tags, they're in a file called nsHTMLTag.h/cpp.
      49                 :  *
      50                 :  * Most of the token types have a similar API. They have methods to get the type
      51                 :  * of token (GetTokenType); those that represent HTML tags also have a method to
      52                 :  * get type tag type (GetTypeID). In addition, most have a method that causes the
      53                 :  * token to help in the parsing process called (Consume). We've also thrown in a
      54                 :  * few standard debugging methods as well.
      55                 :  */
      56                 : 
      57                 : #ifndef HTMLTOKENS_H
      58                 : #define HTMLTOKENS_H
      59                 : 
      60                 : #include "nsToken.h"
      61                 : #include "nsHTMLTags.h"
      62                 : #include "nsString.h"
      63                 : #include "nsScannerString.h"
      64                 : 
      65                 : class nsScanner;
      66                 : 
      67                 :   /*******************************************************************
      68                 :    * This enum defines the set of token types that we currently support.
      69                 :    *******************************************************************/
      70                 : 
      71                 : enum eHTMLTokenTypes {
      72                 :   eToken_unknown=0,
      73                 :   eToken_start=1,      eToken_end,          eToken_comment,         eToken_entity,
      74                 :   eToken_whitespace,   eToken_newline,      eToken_text,            eToken_attribute,
      75                 :   eToken_instruction,  eToken_cdatasection, eToken_doctypeDecl,     eToken_markupDecl,
      76                 :   eToken_last //make sure this stays the last token...
      77                 : };
      78                 : 
      79                 : nsresult      ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
      80                 : nsresult      ConsumeAttributeText(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
      81                 : const PRUnichar* GetTagName(PRInt32 aTag);
      82                 : //PRInt32     FindEntityIndex(nsString& aString,PRInt32 aCount=-1);
      83                 : 
      84                 : 
      85                 : 
      86                 : /**
      87                 :  *  This declares the basic token type used in the HTML DTD's.
      88                 :  *  @update  gess 3/25/98
      89                 :  */
      90                 : class CHTMLToken : public CToken {
      91                 : public:
      92                 :   virtual ~CHTMLToken();
      93                 :   CHTMLToken(eHTMLTags aTag);
      94                 : 
      95               0 :   virtual eContainerInfo GetContainerInfo(void) const {return eFormUnknown;}
      96               0 :   virtual void SetContainerInfo(eContainerInfo aInfo) { }
      97                 : 
      98                 : protected:
      99                 : };
     100                 : 
     101                 : /**
     102                 :  *  This declares start tokens, which always take the form <xxxx>.
     103                 :  *  This class also knows how to consume related attributes.
     104                 :  *
     105                 :  *  @update  gess 3/25/98
     106                 :  */
     107             850 : class CStartToken: public CHTMLToken {
     108             850 :   CTOKEN_IMPL_SIZEOF
     109                 : 
     110                 : public:
     111                 :   CStartToken(eHTMLTags aTag=eHTMLTag_unknown);
     112                 :   CStartToken(const nsAString& aString);
     113                 :   CStartToken(const nsAString& aName,eHTMLTags aTag);
     114                 : 
     115                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     116                 :   virtual PRInt32 GetTypeID(void);
     117                 :   virtual PRInt32 GetTokenType(void);
     118                 : 
     119                 :   virtual bool IsEmpty(void);
     120                 :   virtual void SetEmpty(bool aValue);
     121                 : 
     122                 :   virtual const nsSubstring& GetStringValue();
     123                 :   virtual void GetSource(nsString& anOutputString);
     124                 :   virtual void AppendSourceTo(nsAString& anOutputString);
     125                 : 
     126                 :   // the following info is used to set well-formedness state on start tags...
     127               0 :   virtual eContainerInfo GetContainerInfo(void) const {return mContainerInfo;}
     128            1214 :   virtual void SetContainerInfo(eContainerInfo aContainerInfo) {
     129            1214 :     if (eFormUnknown==mContainerInfo) {
     130             719 :       mContainerInfo=aContainerInfo;
     131                 :     }
     132            1214 :   }
     133               0 :   virtual bool IsWellFormed(void) const {
     134               0 :     return eWellFormed == mContainerInfo;
     135                 :   }
     136                 : 
     137                 :   nsString mTextValue;
     138                 : protected:
     139                 :   eContainerInfo mContainerInfo;
     140                 :   bool mEmpty;
     141                 : #ifdef DEBUG
     142                 :   bool mAttributed;
     143                 : #endif
     144                 : };
     145                 : 
     146                 : 
     147                 : /**
     148                 :  *  This declares end tokens, which always take the
     149                 :  *  form </xxxx>. This class also knows how to consume
     150                 :  *  related attributes.
     151                 :  *
     152                 :  *  @update  gess 3/25/98
     153                 :  */
     154             344 : class CEndToken: public CHTMLToken {
     155             344 :   CTOKEN_IMPL_SIZEOF
     156                 : 
     157                 : public:
     158                 :   CEndToken(eHTMLTags aTag);
     159                 :   CEndToken(const nsAString& aString);
     160                 :   CEndToken(const nsAString& aName,eHTMLTags aTag);
     161                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     162                 :   virtual PRInt32 GetTypeID(void);
     163                 :   virtual PRInt32 GetTokenType(void);
     164                 : 
     165                 :   virtual const nsSubstring& GetStringValue();
     166                 :   virtual void GetSource(nsString& anOutputString);
     167                 :   virtual void AppendSourceTo(nsAString& anOutputString);
     168                 : 
     169                 : protected:
     170                 :   nsString mTextValue;
     171                 : };
     172                 : 
     173                 : 
     174                 : /**
     175                 :  *  This declares comment tokens. Comments are usually
     176                 :  *  thought of as tokens, but we treat them that way
     177                 :  *  here so that the parser can have a consistent view
     178                 :  *  of all tokens.
     179                 :  *
     180                 :  *  @update  gess 3/25/98
     181                 :  */
     182              25 : class CCommentToken: public CHTMLToken {
     183              25 :   CTOKEN_IMPL_SIZEOF
     184                 : 
     185                 : public:
     186                 :   CCommentToken();
     187                 :   CCommentToken(const nsAString& aString);
     188                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     189                 :   virtual PRInt32 GetTokenType(void);
     190                 :   virtual const nsSubstring& GetStringValue(void);
     191                 :   virtual void AppendSourceTo(nsAString& anOutputString);
     192                 : 
     193                 :   nsresult ConsumeStrictComment(nsScanner& aScanner);
     194                 :   nsresult ConsumeQuirksComment(nsScanner& aScanner);
     195                 : 
     196                 : protected:
     197                 :   nsScannerSubstring mComment; // does not include MDO & MDC
     198                 :   nsScannerSubstring mCommentDecl; // includes MDO & MDC
     199                 : };
     200                 : 
     201                 : 
     202                 : /**
     203                 :  *  This class declares entity tokens, which always take
     204                 :  *  the form &xxxx;. This class also offers a few utility
     205                 :  *  methods that allow you to easily reduce entities.
     206                 :  *
     207                 :  *  @update  gess 3/25/98
     208                 :  */
     209               0 : class CEntityToken : public CHTMLToken {
     210               0 :   CTOKEN_IMPL_SIZEOF
     211                 : 
     212                 : public:
     213                 :   CEntityToken();
     214                 :   CEntityToken(const nsAString& aString);
     215                 :   virtual PRInt32 GetTokenType(void);
     216                 :   PRInt32 TranslateToUnicodeStr(nsString& aString);
     217                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     218                 :   static nsresult ConsumeEntity(PRUnichar aChar, nsString& aString,
     219                 :                                 nsScanner& aScanner);
     220                 :   static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
     221                 : 
     222                 :   virtual const nsSubstring& GetStringValue(void);
     223                 :   virtual void GetSource(nsString& anOutputString);
     224                 :   virtual void AppendSourceTo(nsAString& anOutputString);
     225                 : 
     226                 : protected:
     227                 :   nsString mTextValue;
     228                 : };
     229                 : 
     230                 : 
     231                 : /**
     232                 :  *  Whitespace tokens are used where whitespace can be
     233                 :  *  detected as distinct from text. This allows us to
     234                 :  *  easily skip leading/trailing whitespace when desired.
     235                 :  *
     236                 :  *  @update  gess 3/25/98
     237                 :  */
     238             281 : class CWhitespaceToken: public CHTMLToken {
     239             281 :   CTOKEN_IMPL_SIZEOF
     240                 : 
     241                 : public:
     242                 :   CWhitespaceToken();
     243                 :   CWhitespaceToken(const nsAString& aString);
     244                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     245                 :   virtual PRInt32 GetTokenType(void);
     246                 :   virtual const nsSubstring& GetStringValue(void);
     247                 : 
     248                 : protected:
     249                 :   nsScannerSharedSubstring mTextValue;
     250                 : };
     251                 : 
     252                 : /**
     253                 :  *  Text tokens contain the normalized form of html text.
     254                 :  *  These tokens are guaranteed not to contain entities,
     255                 :  *  start or end tags, or newlines.
     256                 :  *
     257                 :  *  @update  gess 3/25/98
     258                 :  */
     259             306 : class CTextToken: public CHTMLToken {
     260             306 :   CTOKEN_IMPL_SIZEOF
     261                 : 
     262                 : public:
     263                 :   CTextToken();
     264                 :   CTextToken(const nsAString& aString);
     265                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     266                 :   virtual PRInt32 GetTokenType(void);
     267                 :   virtual PRInt32 GetTextLength(void);
     268                 :   virtual void CopyTo(nsAString& aStr);
     269                 :   virtual const nsSubstring& GetStringValue(void);
     270                 :   virtual void Bind(nsScanner* aScanner, nsScannerIterator& aStart,
     271                 :                     nsScannerIterator& aEnd);
     272                 :   virtual void Bind(const nsAString& aStr);
     273                 : 
     274                 :   nsresult ConsumeCharacterData(bool aIgnoreComments,
     275                 :                                 nsScanner& aScanner,
     276                 :                                 const nsAString& aEndTagName,
     277                 :                                 PRInt32 aFlag,
     278                 :                                 bool& aFlushTokens);
     279                 : 
     280                 :   nsresult ConsumeParsedCharacterData(bool aDiscardFirstNewline,
     281                 :                                       bool aConservativeConsume,
     282                 :                                       nsScanner& aScanner,
     283                 :                                       const nsAString& aEndTagName,
     284                 :                                       PRInt32 aFlag,
     285                 :                                       bool& aFound);
     286                 : 
     287                 : protected:
     288                 :   nsScannerSubstring mTextValue;
     289                 : };
     290                 : 
     291                 : 
     292                 : /**
     293                 :  *  CDATASection tokens contain raw unescaped text content delimited by
     294                 :  *  a ![CDATA[ and ]].
     295                 :  *  XXX Not really a HTML construct - maybe we need a separation
     296                 :  *
     297                 :  *  @update  vidur 11/12/98
     298                 :  */
     299               0 : class CCDATASectionToken : public CHTMLToken {
     300               0 :   CTOKEN_IMPL_SIZEOF
     301                 : 
     302                 : public:
     303                 :   CCDATASectionToken(eHTMLTags aTag = eHTMLTag_unknown);
     304                 :   CCDATASectionToken(const nsAString& aString);
     305                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     306                 :   virtual PRInt32 GetTokenType(void);
     307                 :   virtual const nsSubstring& GetStringValue(void);
     308                 : 
     309                 : protected:
     310                 :   nsString mTextValue;
     311                 : };
     312                 : 
     313                 : 
     314                 : /**
     315                 :  *  Declaration tokens contain raw unescaped text content (not really, but
     316                 :  *  right now we use this only for view source).
     317                 :  *  XXX Not really a HTML construct - maybe we need a separation
     318                 :  *
     319                 :  */
     320               0 : class CMarkupDeclToken : public CHTMLToken {
     321               0 :   CTOKEN_IMPL_SIZEOF
     322                 : 
     323                 : public:
     324                 :   CMarkupDeclToken();
     325                 :   CMarkupDeclToken(const nsAString& aString);
     326                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     327                 :   virtual PRInt32 GetTokenType(void);
     328                 :   virtual const nsSubstring& GetStringValue(void);
     329                 : 
     330                 : protected:
     331                 :   nsScannerSubstring  mTextValue;
     332                 : };
     333                 : 
     334                 : 
     335                 : /**
     336                 :  *  Attribute tokens are used to contain attribute key/value
     337                 :  *  pairs whereever they may occur. Typically, they should
     338                 :  *  occur only in start tokens. However, we may expand that
     339                 :  *  ability when XML tokens become commonplace.
     340                 :  *
     341                 :  *  @update  gess 3/25/98
     342                 :  */
     343                 : class CAttributeToken: public CHTMLToken {
     344             732 :   CTOKEN_IMPL_SIZEOF
     345                 : 
     346                 : public:
     347                 :   CAttributeToken();
     348                 :   CAttributeToken(const nsAString& aString);
     349                 :   CAttributeToken(const nsAString& aKey, const nsAString& aString);
     350             732 :   ~CAttributeToken() {}
     351                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     352                 :   virtual PRInt32 GetTokenType(void);
     353            1799 :   const nsSubstring&     GetKey(void) { return mTextKey.AsString(); }
     354                 :   virtual void SetKey(const nsAString& aKey);
     355                 :   virtual void BindKey(nsScanner* aScanner, nsScannerIterator& aStart,
     356                 :                        nsScannerIterator& aEnd);
     357             621 :   const nsSubstring& GetValue(void) {return mTextValue.str();}
     358                 :   virtual const nsSubstring& GetStringValue(void);
     359                 :   virtual void GetSource(nsString& anOutputString);
     360                 :   virtual void AppendSourceTo(nsAString& anOutputString);
     361                 : 
     362                 :   bool mHasEqualWithoutValue;
     363                 : protected:
     364                 :   nsScannerSharedSubstring mTextValue;
     365                 :   nsScannerSubstring mTextKey;
     366                 : };
     367                 : 
     368                 : 
     369                 : /**
     370                 :  *  Newline tokens contain, you guessed it, newlines.
     371                 :  *  They consume newline (CR/LF) either alone or in pairs.
     372                 :  *
     373                 :  *  @update  gess 3/25/98
     374                 :  */
     375             530 : class CNewlineToken: public CHTMLToken {
     376             530 :   CTOKEN_IMPL_SIZEOF
     377                 : 
     378                 : public:
     379                 :   CNewlineToken();
     380                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     381                 :   virtual PRInt32 GetTokenType(void);
     382                 :   virtual const nsSubstring& GetStringValue(void);
     383                 : 
     384                 :   static void AllocNewline();
     385                 :   static void FreeNewline();
     386                 : };
     387                 : 
     388                 : 
     389                 : /**
     390                 :  *  Whitespace tokens are used where whitespace can be
     391                 :  *  detected as distinct from text. This allows us to
     392                 :  *  easily skip leading/trailing whitespace when desired.
     393                 :  *
     394                 :  *  @update  gess 3/25/98
     395                 :  */
     396               0 : class CInstructionToken: public CHTMLToken {
     397               0 :   CTOKEN_IMPL_SIZEOF
     398                 : 
     399                 : public:
     400                 :   CInstructionToken();
     401                 :   CInstructionToken(const nsAString& aString);
     402                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     403                 :   virtual PRInt32 GetTokenType(void);
     404                 :   virtual const nsSubstring& GetStringValue(void);
     405                 : 
     406                 : protected:
     407                 :   nsString mTextValue;
     408                 : };
     409                 : 
     410                 : 
     411                 : /**
     412                 :  * This token is generated by the HTML and Expat tokenizers
     413                 :  * when they see the doctype declaration ("<!DOCTYPE ... >")
     414                 :  *
     415                 :  */
     416                 : 
     417              25 : class CDoctypeDeclToken: public CHTMLToken {
     418              25 :   CTOKEN_IMPL_SIZEOF
     419                 : 
     420                 : public:
     421                 :   CDoctypeDeclToken(eHTMLTags aTag=eHTMLTag_unknown);
     422                 :   CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag=eHTMLTag_unknown);
     423                 :   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
     424                 :   virtual PRInt32 GetTokenType(void);
     425                 :   virtual const nsSubstring& GetStringValue(void);
     426                 :   virtual void SetStringValue(const nsAString& aStr);
     427                 : 
     428                 : protected:
     429                 :   nsString mTextValue;
     430                 : };
     431                 : 
     432                 : #endif

Generated by: LCOV version 1.7