LCOV - code coverage report
Current view: directory - parser/htmlparser/src - nsHTMLTokenizer.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 413 297 71.9 %
Date: 2012-06-02 Functions: 33 28 84.8 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* vim: set sw=2 ts=2 et tw=78: */
       3                 : /* ***** BEGIN LICENSE BLOCK *****
       4                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       5                 :  *
       6                 :  * The contents of this file are subject to the Mozilla Public License Version
       7                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       8                 :  * the License. You may obtain a copy of the License at
       9                 :  * http://www.mozilla.org/MPL/
      10                 :  *
      11                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      12                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      13                 :  * for the specific language governing rights and limitations under the
      14                 :  * License.
      15                 :  *
      16                 :  * The Original Code is mozilla.org code.
      17                 :  *
      18                 :  * The Initial Developer of the Original Code is
      19                 :  * Netscape Communications Corporation.
      20                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      21                 :  * the Initial Developer. All Rights Reserved.
      22                 :  *
      23                 :  * Contributor(s):
      24                 :  *   Blake Kaplan <mrbkap@gmail.com>
      25                 :  *
      26                 :  * Alternatively, the contents of this file may be used under the terms of
      27                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      28                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      29                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      30                 :  * of those above. If you wish to allow use of your version of this file only
      31                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      32                 :  * use your version of this file under the terms of the MPL, indicate your
      33                 :  * decision by deleting the provisions above and replace them with the notice
      34                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      35                 :  * the provisions above, a recipient may use your version of this file under
      36                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      37                 :  *
      38                 :  * ***** END LICENSE BLOCK ***** */
      39                 : 
      40                 : 
      41                 : /**
      42                 :  * @file nsHTMLTokenizer.cpp
      43                 :  * This is an implementation of the nsITokenizer interface.
      44                 :  * This file contains the implementation of a tokenizer to tokenize an HTML
      45                 :  * document. It attempts to do so, making tradeoffs between compatibility with
      46                 :  * older parsers and the SGML specification. Note that most of the real
      47                 :  * "tokenization" takes place in nsHTMLTokens.cpp.
      48                 :  */
      49                 : 
      50                 : #include "nsIAtom.h"
      51                 : #include "nsHTMLTokenizer.h"
      52                 : #include "nsScanner.h"
      53                 : #include "nsElementTable.h"
      54                 : #include "nsReadableUtils.h"
      55                 : #include "nsUnicharUtils.h"
      56                 : #include "nsParserConstants.h"
      57                 : 
      58                 : /************************************************************************
      59                 :   And now for the main class -- nsHTMLTokenizer...
      60                 :  ************************************************************************/
      61                 : 
      62                 : /**
      63                 :  * Satisfy the nsISupports interface.
      64                 :  */
      65             140 : NS_IMPL_ISUPPORTS1(nsHTMLTokenizer, nsITokenizer)
      66                 : 
      67                 : /**
      68                 :  * Default constructor
      69                 :  * 
      70                 :  * @param  aParseMode The current mode the document is in (quirks, etc.)
      71                 :  * @param  aDocType The document type of the current document
      72                 :  * @param  aCommand What we are trying to do (view-source, parse a fragment, etc.)
      73                 :  */
      74              28 : nsHTMLTokenizer::nsHTMLTokenizer(nsDTDMode aParseMode,
      75                 :                                  eParserDocType aDocType,
      76                 :                                  eParserCommands aCommand,
      77                 :                                  PRUint32 aFlags)
      78              28 :   : mTokenDeque(0), mFlags(aFlags)
      79                 : {
      80              28 :   if (aParseMode == eDTDMode_full_standards ||
      81                 :       aParseMode == eDTDMode_almost_standards) {
      82               0 :     mFlags |= NS_IPARSER_FLAG_STRICT_MODE;
      83              28 :   } else if (aParseMode == eDTDMode_quirks)  {
      84              28 :     mFlags |= NS_IPARSER_FLAG_QUIRKS_MODE;
      85               0 :   } else if (aParseMode == eDTDMode_autodetect) {
      86               0 :     mFlags |= NS_IPARSER_FLAG_AUTO_DETECT_MODE;
      87                 :   } else {
      88               0 :     mFlags |= NS_IPARSER_FLAG_UNKNOWN_MODE;
      89                 :   }
      90                 : 
      91              28 :   if (aDocType == ePlainText) {
      92               0 :     mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT;
      93              28 :   } else if (aDocType == eXML) {
      94               0 :     mFlags |= NS_IPARSER_FLAG_XML;
      95              28 :   } else if (aDocType == eHTML_Quirks ||
      96                 :              aDocType == eHTML_Strict) {
      97              28 :     mFlags |= NS_IPARSER_FLAG_HTML;
      98                 :   }
      99                 :   
     100                 :   mFlags |= aCommand == eViewSource
     101                 :             ? NS_IPARSER_FLAG_VIEW_SOURCE
     102              28 :             : NS_IPARSER_FLAG_VIEW_NORMAL;
     103                 : 
     104              28 :   NS_ASSERTION(!(mFlags & NS_IPARSER_FLAG_XML) || 
     105                 :                 (mFlags & NS_IPARSER_FLAG_VIEW_SOURCE),
     106                 :               "Why isn't this XML document going through our XML parser?");
     107                 : 
     108              28 :   mTokenAllocator = nsnull;
     109              28 :   mTokenScanPos = 0;
     110              28 : }
     111                 : 
     112                 : /**
     113                 :  * The destructor ensures that we don't leak any left over tokens.
     114                 :  */
     115              84 : nsHTMLTokenizer::~nsHTMLTokenizer()
     116                 : {
     117              28 :   if (mTokenDeque.GetSize()) {
     118               0 :     CTokenDeallocator theDeallocator(mTokenAllocator->GetArenaPool());
     119               0 :     mTokenDeque.ForEach(theDeallocator);
     120                 :   }
     121             112 : }
     122                 : 
     123                 : /*static*/ PRUint32
     124              56 : nsHTMLTokenizer::GetFlags(const nsIContentSink* aSink)
     125                 : {
     126              56 :   PRUint32 flags = 0;
     127                 :   nsCOMPtr<nsIHTMLContentSink> sink =
     128             112 :     do_QueryInterface(const_cast<nsIContentSink*>(aSink));
     129              56 :   if (sink) {
     130              56 :     bool enabled = true;
     131              56 :     sink->IsEnabled(eHTMLTag_frameset, &enabled);
     132              56 :     if (enabled) {
     133              56 :       flags |= NS_IPARSER_FLAG_FRAMES_ENABLED;
     134                 :     }
     135              56 :     sink->IsEnabled(eHTMLTag_script, &enabled);
     136              56 :     if (enabled) {
     137              56 :       flags |= NS_IPARSER_FLAG_SCRIPT_ENABLED;
     138                 :     }
     139                 :   }
     140              56 :   return flags;
     141                 : }
     142                 : 
     143                 : /*******************************************************************
     144                 :   Here begins the real working methods for the tokenizer.
     145                 :  *******************************************************************/
     146                 : 
     147                 : /**
     148                 :  * Adds a token onto the end of the deque if aResult is a successful result.
     149                 :  * Otherwise, this function frees aToken and sets it to nsnull.
     150                 :  *
     151                 :  * @param aToken The token that wants to be added.
     152                 :  * @param aResult The error code that will be used to determine if we actually
     153                 :  *                want to push this token.
     154                 :  * @param aDeque The deque we want to push aToken onto.
     155                 :  * @param aTokenAllocator The allocator we use to free aToken in case aResult
     156                 :  *                        is not a success code.
     157                 :  */
     158                 : /* static */
     159                 : void
     160            3037 : nsHTMLTokenizer::AddToken(CToken*& aToken,
     161                 :                           nsresult aResult,
     162                 :                           nsDeque* aDeque,
     163                 :                           nsTokenAllocator* aTokenAllocator)
     164                 : {
     165            3037 :   if (aToken && aDeque) {
     166            3037 :     if (NS_SUCCEEDED(aResult)) {
     167            3037 :       aDeque->Push(aToken);
     168                 :     } else {
     169               0 :       IF_FREE(aToken, aTokenAllocator);
     170                 :     }
     171                 :   }
     172            3037 : }
     173                 : 
     174                 : /**
     175                 :  * Retrieve a pointer to the global token recycler...
     176                 :  *
     177                 :  * @return Pointer to recycler (or null)
     178                 :  */
     179                 : nsTokenAllocator*
     180            2573 : nsHTMLTokenizer::GetTokenAllocator()
     181                 : {
     182            2573 :   return mTokenAllocator;
     183                 : }
     184                 : 
     185                 : /**
     186                 :  * This method provides access to the topmost token in the tokenDeque.
     187                 :  * The token is not really removed from the list.
     188                 :  *
     189                 :  * @return Pointer to token
     190                 :  */
     191                 : CToken*
     192               0 : nsHTMLTokenizer::PeekToken()
     193                 : {
     194               0 :   return (CToken*)mTokenDeque.PeekFront();
     195                 : }
     196                 : 
     197                 : /**
     198                 :  * This method provides access to the topmost token in the tokenDeque.
     199                 :  * The token is really removed from the list; if the list is empty we return 0.
     200                 :  *
     201                 :  * @return Pointer to token or NULL
     202                 :  */
     203                 : CToken*
     204            3178 : nsHTMLTokenizer::PopToken()
     205                 : {
     206            3178 :   return (CToken*)mTokenDeque.PopFront();
     207                 : }
     208                 : 
     209                 : 
     210                 : /**
     211                 :  * Pushes a token onto the front of our deque such that the next call to
     212                 :  * PopToken() or PeekToken() will return that token.
     213                 :  * 
     214                 :  * @param theToken The next token to be processed
     215                 :  * @return theToken
     216                 :  */
     217                 : CToken*
     218              85 : nsHTMLTokenizer::PushTokenFront(CToken* theToken)
     219                 : {
     220              85 :   mTokenDeque.PushFront(theToken);
     221              85 :   return theToken;
     222                 : }
     223                 : 
     224                 : /**
     225                 :  * Pushes a token onto the deque.
     226                 :  * 
     227                 :  * @param theToken the new token.
     228                 :  * @return theToken
     229                 :  */
     230                 : CToken*
     231               0 : nsHTMLTokenizer::PushToken(CToken* theToken)
     232                 : {
     233               0 :   mTokenDeque.Push(theToken);
     234               0 :   return theToken;
     235                 : }
     236                 : 
     237                 : /**
     238                 :  * Returns the size of the deque.
     239                 :  *
     240                 :  * @return The number of remaining tokens.
     241                 :  */
     242                 : PRInt32
     243             581 : nsHTMLTokenizer::GetCount()
     244                 : {
     245             581 :   return mTokenDeque.GetSize();
     246                 : }
     247                 : 
     248                 : /**
     249                 :  * Allows access to an arbitrary token in the deque. The accessed token is left
     250                 :  * in the deque.
     251                 :  *
     252                 :  * @param anIndex The index of the target token. Token 0 would be the same as
     253                 :  *                the result of a call to PeekToken()
     254                 :  * @return The requested token.
     255                 :  */
     256                 : CToken*
     257              28 : nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex)
     258                 : {
     259              28 :   return (CToken*)mTokenDeque.ObjectAt(anIndex);
     260                 : }
     261                 : 
     262                 : /**
     263                 :  * This method is part of the "sandwich" that occurs when we want to tokenize
     264                 :  * a document. This prepares us to be able to tokenize properly.
     265                 :  *
     266                 :  * @param aIsFinalChunk Whether this is the last chunk of data that we will
     267                 :  *                      get to see.
     268                 :  * @param aTokenAllocator The token allocator to use for this document.
     269                 :  * @return Our success in setting up.
     270                 :  */
     271                 : nsresult
     272              53 : nsHTMLTokenizer::WillTokenize(bool aIsFinalChunk,
     273                 :                               nsTokenAllocator* aTokenAllocator)
     274                 : {
     275              53 :   mTokenAllocator = aTokenAllocator;
     276              53 :   mIsFinalChunk = aIsFinalChunk;
     277                 : 
     278                 :   // Cause ScanDocStructure to search from here for new tokens...
     279              53 :   mTokenScanPos = mTokenDeque.GetSize();
     280              53 :   return NS_OK;
     281                 : }
     282                 : 
     283                 : /**
     284                 :  * Pushes all of the tokens in aDeque onto the front of our deque so they
     285                 :  * get processed before any other tokens.
     286                 :  *
     287                 :  * @param aDeque The deque with the tokens in it.
     288                 :  */
     289                 : void
     290              28 : nsHTMLTokenizer::PrependTokens(nsDeque& aDeque)
     291                 : {
     292              28 :   PRInt32 aCount = aDeque.GetSize();
     293                 :   
     294              82 :   for (PRInt32 anIndex = 0; anIndex < aCount; ++anIndex) {
     295              54 :     CToken* theToken = (CToken*)aDeque.Pop();
     296              54 :     PushTokenFront(theToken);
     297                 :   }
     298              28 : }
     299                 : 
     300                 : /**
     301                 :  * Copies the state flags from aTokenizer into this tokenizer. This is used
     302                 :  * to pass information around between the main tokenizer and tokenizers
     303                 :  * created for document.write() calls.
     304                 :  *
     305                 :  * @param aTokenizer The tokenizer with more information in it.
     306                 :  * @return NS_OK
     307                 :  */
     308                 : nsresult
     309               0 : nsHTMLTokenizer::CopyState(nsITokenizer* aTokenizer)
     310                 : {
     311               0 :   if (aTokenizer) {
     312               0 :     mFlags = ((nsHTMLTokenizer*)aTokenizer)->mFlags;
     313                 :   }
     314                 : 
     315               0 :   return NS_OK;
     316                 : }
     317                 : 
     318                 : /**
     319                 :  * This is a utilty method for ScanDocStructure, which finds a given
     320                 :  * tag in the stack. The return value is meant to be used with
     321                 :  * nsDeque::ObjectAt() on aTagStack.
     322                 :  *
     323                 :  * @param   aTag -- the ID of the tag we're seeking
     324                 :  * @param   aTagStack -- the stack to be searched
     325                 :  * @return  index position of tag in stack if found, otherwise kNotFound
     326                 :  */
     327                 : static PRInt32
     328             576 : FindLastIndexOfTag(eHTMLTags aTag, nsDeque &aTagStack)
     329                 : {
     330             576 :   PRInt32 theCount = aTagStack.GetSize();
     331                 :   
     332             576 :   while (0 < theCount) {
     333            3794 :     CHTMLToken* theToken = (CHTMLToken*)aTagStack.ObjectAt(--theCount);  
     334            3794 :     if (theToken) {
     335            3794 :       eHTMLTags theTag = (eHTMLTags)theToken->GetTypeID();
     336            3794 :       if (theTag == aTag) {
     337             296 :         return theCount;
     338                 :       }
     339                 :     }
     340                 :   }
     341                 : 
     342             280 :   return kNotFound;
     343                 : }
     344                 : 
     345                 : /**
     346                 :  * This method scans the sequence of tokens to determine whether or not the
     347                 :  * tag structure of the document is well formed. In well formed cases, we can
     348                 :  * skip doing residual style handling and allow inlines to contain block-level
     349                 :  * elements.
     350                 :  *
     351                 :  * @param aFinalChunk Is unused.
     352                 :  * @return Success (currently, this function cannot fail).
     353                 :  */
     354              53 : nsresult nsHTMLTokenizer::ScanDocStructure(bool aFinalChunk)
     355                 : {
     356              53 :   nsresult result = NS_OK;
     357              53 :   if (!mTokenDeque.GetSize()) {
     358              28 :     return result;
     359                 :   }
     360                 : 
     361              25 :   CHTMLToken* theToken = (CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos);
     362                 : 
     363                 :   // Start by finding the first start tag that hasn't been reviewed.
     364              50 :   while (mTokenScanPos > 0) {
     365               0 :     if (theToken) {
     366               0 :       eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType());  
     367               0 :       if (theType == eToken_start &&
     368               0 :           theToken->GetContainerInfo() == eFormUnknown) {
     369               0 :         break;
     370                 :       }
     371                 :     }
     372               0 :     theToken = (CHTMLToken*)mTokenDeque.ObjectAt(--mTokenScanPos);
     373                 :   }
     374                 : 
     375                 :   // Now that we know where to start, let's walk through the
     376                 :   // tokens to see which are well-formed. Stop when you run out
     377                 :   // of fresh tokens.
     378                 : 
     379              50 :   nsDeque       theStack(0);
     380              50 :   nsDeque       tempStack(0);
     381              25 :   PRInt32       theStackDepth = 0;
     382                 :   // Don't bother if we get ridiculously deep.
     383                 :   static  const PRInt32 theMaxStackDepth = 200;
     384                 : 
     385            3087 :   while (theToken && theStackDepth < theMaxStackDepth) {
     386            3037 :     eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType());
     387            3037 :     eHTMLTags       theTag  = (eHTMLTags)theToken->GetTypeID();
     388                 : 
     389            3037 :     if (nsHTMLElement::IsContainer(theTag)) { // Bug 54117
     390            1895 :       bool theTagIsBlock  = gHTMLElements[theTag].IsMemberOf(kBlockEntity);
     391                 :       bool theTagIsInline = theTagIsBlock
     392                 :                               ? false
     393            1895 :                               : gHTMLElements[theTag].IsMemberOf(kInlineEntity);
     394                 : 
     395            1895 :       if (theTagIsBlock || theTagIsInline || eHTMLTag_table == theTag) {
     396            1063 :         switch(theType) {
     397                 :           case eToken_start:
     398                 :             {
     399             744 :               if (gHTMLElements[theTag].ShouldVerifyHierarchy()) {
     400             490 :                 PRInt32 earlyPos = FindLastIndexOfTag(theTag, theStack);
     401             490 :                 if (earlyPos != kNotFound) {
     402                 :                   // Uh-oh, we've found a tag that is not allowed to nest at
     403                 :                   // all. Mark the previous one and all of its children as 
     404                 :                   // malformed to increase our chances of doing RS handling
     405                 :                   // on all of them. We want to do this for cases such as:
     406                 :                   // <a><div><a></a></div></a>.
     407                 :                   // Note that we have to iterate through all of the chilren
     408                 :                   // of the original malformed tag to protect against:
     409                 :                   // <a><font><div><a></a></div></font></a>, so that the <font>
     410                 :                   // is allowed to contain the <div>.
     411                 :                   // XXX What about <a><span><a>, where the second <a> closes
     412                 :                   // the <span>?
     413             210 :                   nsDequeIterator it(theStack, earlyPos), end(theStack.End());
     414             711 :                   while (it < end) {
     415                 :                     CHTMLToken *theMalformedToken = 
     416             291 :                         static_cast<CHTMLToken*>(it++);
     417                 :                   
     418             291 :                     theMalformedToken->SetContainerInfo(eMalformed);
     419                 :                   }
     420                 :                 }
     421                 :               }
     422                 : 
     423             744 :               theStack.Push(theToken);
     424             744 :               ++theStackDepth;
     425                 :             }
     426             744 :             break;
     427                 :           case eToken_end: 
     428                 :             {
     429                 :               CHTMLToken *theLastToken =
     430             319 :                 static_cast<CHTMLToken*>(theStack.Peek());
     431             319 :               if (theLastToken) {
     432             319 :                 if (theTag == theLastToken->GetTypeID()) {
     433             233 :                   theStack.Pop(); // Yank it for real 
     434             233 :                   theStackDepth--;
     435             233 :                   theLastToken->SetContainerInfo(eWellFormed);
     436                 :                 } else {
     437                 :                   // This token wasn't what we expected it to be! We need to
     438                 :                   // go searching for its real start tag on our stack. Each
     439                 :                   // tag in between the end tag and start tag must be malformed
     440                 : 
     441              86 :                   if (FindLastIndexOfTag(theTag, theStack) != kNotFound) {
     442                 :                     // Find theTarget in the stack, marking each (malformed!)
     443                 :                     // tag in our way.
     444              86 :                     theStack.Pop(); // Pop off theLastToken for real.
     445            1208 :                     do {
     446             604 :                       theLastToken->SetContainerInfo(eMalformed);
     447             604 :                       tempStack.Push(theLastToken);
     448             604 :                       theLastToken = static_cast<CHTMLToken*>(theStack.Pop());
     449             604 :                     } while (theLastToken && theTag != theLastToken->GetTypeID());
     450                 :                     // XXX The above test can confuse two different userdefined 
     451                 :                     // tags.
     452                 : 
     453              86 :                     NS_ASSERTION(theLastToken,
     454                 :                                  "FindLastIndexOfTag lied to us!"
     455                 :                                  " We couldn't find theTag on theStack");
     456              86 :                     theLastToken->SetContainerInfo(eMalformed);
     457                 : 
     458                 :                     // Great, now push all of the other tokens back onto the
     459                 :                     // stack to preserve the general structure of the document.
     460                 :                     // Note that we don't push the target token back onto the
     461                 :                     // the stack (since it was just closed).
     462             776 :                     while (tempStack.GetSize() != 0) {
     463             604 :                       theStack.Push(tempStack.Pop());
     464                 :                     }
     465                 :                   }
     466                 :                 }
     467                 :               }
     468                 :             }
     469             319 :             break;
     470                 :           default:
     471               0 :             break; 
     472                 :         }
     473                 :       }
     474                 :     }
     475                 : 
     476            3037 :     theToken = (CHTMLToken*)mTokenDeque.ObjectAt(++mTokenScanPos);
     477                 :   }
     478                 : 
     479              25 :   return result;
     480                 : }
     481                 : 
     482                 : /**
     483                 :  * This method is called after we're done tokenizing a chunk of data.
     484                 :  *
     485                 :  * @param aFinalChunk Tells us if this was the last chunk of data.
     486                 :  * @return Error result.
     487                 :  */
     488                 : nsresult
     489              53 : nsHTMLTokenizer::DidTokenize(bool aFinalChunk)
     490                 : {
     491              53 :   return ScanDocStructure(aFinalChunk);
     492                 : }
     493                 : 
     494                 : /**
     495                 :  * This method is repeatedly called by the tokenizer. 
     496                 :  * Each time, we determine the kind of token we're about to 
     497                 :  * read, and then we call the appropriate method to handle
     498                 :  * that token type.
     499                 :  *  
     500                 :  * @param  aScanner The source of our input.
     501                 :  * @param  aFlushTokens An OUT parameter to tell the caller whether it should
     502                 :  *                      process our queued tokens up to now (e.g., when we
     503                 :  *                      reach a <script>).
     504                 :  * @return Success or error
     505                 :  */
     506                 : nsresult
     507            2308 : nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens)
     508                 : {
     509                 :   PRUnichar theChar;
     510            2308 :   CToken* theToken = nsnull;
     511                 : 
     512            2308 :   nsresult result = aScanner.Peek(theChar);
     513                 : 
     514            2308 :   switch(result) {
     515                 :     case kEOF:
     516                 :       // Tell our caller that'we finished.
     517              53 :       return result;
     518                 : 
     519                 :     case NS_OK:
     520                 :     default:
     521            2255 :       if (!(mFlags & NS_IPARSER_FLAG_PLAIN_TEXT)) {
     522            2255 :         if (kLessThan == theChar) {
     523            1163 :           return ConsumeTag(theChar, theToken, aScanner, aFlushTokens);
     524            1092 :         } else if (kAmpersand == theChar) {
     525               0 :           return ConsumeEntity(theChar, theToken, aScanner);
     526                 :         }
     527                 :       }
     528                 : 
     529            1092 :       if (kCR == theChar || kLF == theChar) {
     530             530 :         return ConsumeNewline(theChar, theToken, aScanner);
     531                 :       } else {
     532             562 :         if (!nsCRT::IsAsciiSpace(theChar)) {
     533             281 :           if (theChar != '\0') {
     534             281 :             result = ConsumeText(theToken, aScanner);
     535                 :           } else {
     536                 :             // Skip the embedded null char. Fix bug 64098.
     537               0 :             aScanner.GetChar(theChar);
     538                 :           }
     539             281 :           break;
     540                 :         }
     541             281 :         result = ConsumeWhitespace(theChar, theToken, aScanner);
     542                 :       }
     543             281 :       break;
     544                 :   }
     545                 : 
     546             562 :   return result;
     547                 : }
     548                 : 
     549                 : /**
     550                 :  * This method is called just after a "<" has been consumed 
     551                 :  * and we know we're at the start of some kind of tagged 
     552                 :  * element. We don't know yet if it's a tag or a comment.
     553                 :  * 
     554                 :  * @param   aChar is the last char read
     555                 :  * @param   aToken is the out arg holding our new token (the function allocates
     556                 :  *                 the return token using mTokenAllocator).
     557                 :  * @param   aScanner represents our input source
     558                 :  * @param   aFlushTokens is an OUT parameter use to tell consumers to flush
     559                 :  *                       the current tokens after processing the current one.
     560                 :  * @return  error code.
     561                 :  */
     562                 : nsresult
     563            1163 : nsHTMLTokenizer::ConsumeTag(PRUnichar aChar,
     564                 :                             CToken*& aToken,
     565                 :                             nsScanner& aScanner,
     566                 :                             bool& aFlushTokens)
     567                 : {
     568                 :   PRUnichar theNextChar, oldChar;
     569            1163 :   nsresult result = aScanner.Peek(aChar, 1);
     570                 : 
     571            1163 :   if (NS_OK == result) {
     572            1163 :     switch (aChar) {
     573                 :       case kForwardSlash:
     574             319 :         result = aScanner.Peek(theNextChar, 2);
     575                 : 
     576             319 :         if (NS_OK == result) {
     577                 :           // Get the original "<" (we've already seen it with a Peek)
     578             319 :           aScanner.GetChar(oldChar);
     579                 : 
     580                 :           // XML allows non ASCII tag names, consume this as an end tag. This
     581                 :           // is needed to make XML view source work
     582             319 :           bool isXML = !!(mFlags & NS_IPARSER_FLAG_XML);
     583             319 :           if (nsCRT::IsAsciiAlpha(theNextChar) ||
     584                 :               kGreaterThan == theNextChar      ||
     585               0 :               (isXML && !nsCRT::IsAscii(theNextChar))) {
     586             319 :             result = ConsumeEndTag(aChar, aToken, aScanner);
     587                 :           } else {
     588               0 :             result = ConsumeComment(aChar, aToken, aScanner);
     589                 :           }
     590                 :         }
     591                 : 
     592             319 :         break;
     593                 : 
     594                 :       case kExclamation:
     595              50 :         result = aScanner.Peek(theNextChar, 2);
     596                 : 
     597              50 :         if (NS_OK == result) {
     598                 :           // Get the original "<" (we've already seen it with a Peek)
     599              50 :           aScanner.GetChar(oldChar);
     600                 : 
     601              50 :           if (kMinus == theNextChar || kGreaterThan == theNextChar) {
     602              25 :             result = ConsumeComment(aChar, aToken, aScanner);
     603                 :           } else {
     604              25 :             result = ConsumeSpecialMarkup(aChar, aToken, aScanner);
     605                 :           }
     606                 :         }
     607              50 :         break;
     608                 : 
     609                 :       case kQuestionMark:
     610                 :         // It must be a processing instruction...
     611                 :         // Get the original "<" (we've already seen it with a Peek)
     612               0 :         aScanner.GetChar(oldChar);
     613               0 :         result = ConsumeProcessingInstruction(aChar, aToken, aScanner);
     614               0 :         break;
     615                 : 
     616                 :       default:
     617                 :         // XML allows non ASCII tag names, consume this as a start tag.
     618             794 :         bool isXML = !!(mFlags & NS_IPARSER_FLAG_XML);
     619             794 :         if (nsCRT::IsAsciiAlpha(aChar) ||
     620               0 :             (isXML && !nsCRT::IsAscii(aChar))) {
     621                 :           // Get the original "<" (we've already seen it with a Peek)
     622             794 :           aScanner.GetChar(oldChar);
     623             794 :           result = ConsumeStartTag(aChar, aToken, aScanner, aFlushTokens);
     624                 :         } else {
     625                 :           // We are not dealing with a tag. So, don't consume the original
     626                 :           // char and leave the decision to ConsumeText().
     627               0 :           result = ConsumeText(aToken, aScanner);
     628                 :         }
     629                 :     }
     630                 :   }
     631                 : 
     632                 :   // Last ditch attempt to make sure we don't lose data.
     633            1163 :   if (kEOF == result && !aScanner.IsIncremental()) {
     634                 :     // Whoops, we don't want to lose any data! Consume the rest as text.
     635                 :     // This normally happens for either a trailing < or </
     636               0 :     result = ConsumeText(aToken, aScanner);
     637                 :   }
     638                 : 
     639            1163 :   return result;
     640                 : }
     641                 : 
     642                 : /**
     643                 :  * This method is called just after we've consumed a start or end
     644                 :  * tag, and we now have to consume its attributes.
     645                 :  * 
     646                 :  * @param   aChar is the last char read
     647                 :  * @param   aToken is the start or end tag that "owns" these attributes.
     648                 :  * @param   aScanner represents our input source
     649                 :  * @return  Error result.
     650                 :  */
     651                 : nsresult
     652             237 : nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar,
     653                 :                                    CToken* aToken,
     654                 :                                    nsScanner& aScanner)
     655                 : {
     656             237 :   bool done = false;
     657             237 :   nsresult result = NS_OK;
     658             237 :   PRInt16 theAttrCount = 0;
     659                 : 
     660             237 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
     661                 : 
     662            1206 :   while (!done && result == NS_OK) {
     663                 :     CAttributeToken* theToken =
     664                 :       static_cast<CAttributeToken*>
     665                 :                  (theAllocator->CreateTokenOfType(eToken_attribute,
     666             732 :                                                      eHTMLTag_unknown));
     667             732 :     if (NS_LIKELY(theToken != nsnull)) {
     668                 :       // Tell the new token to finish consuming text...
     669             732 :       result = theToken->Consume(aChar, aScanner, mFlags);
     670                 : 
     671             732 :       if (NS_SUCCEEDED(result)) {
     672             732 :         ++theAttrCount;
     673             732 :         AddToken((CToken*&)theToken, result, &mTokenDeque, theAllocator);
     674                 :       } else {
     675               0 :         IF_FREE(theToken, mTokenAllocator);
     676                 :         // Bad attribute returns shouldn't propagate out.
     677               0 :         if (NS_ERROR_HTMLPARSER_BADATTRIBUTE == result) {
     678               0 :           result = NS_OK;
     679                 :         }
     680                 :       }
     681                 :     }
     682                 :     else {
     683               0 :       result = NS_ERROR_OUT_OF_MEMORY;
     684                 :     }
     685                 : 
     686                 : #ifdef DEBUG
     687             732 :     if (NS_SUCCEEDED(result)) {
     688             732 :       PRInt32 newline = 0;
     689             732 :       aScanner.SkipWhitespace(newline);
     690             732 :       NS_ASSERTION(newline == 0,
     691                 :           "CAttribute::Consume() failed to collect all the newlines!");
     692                 :     }
     693                 : #endif
     694             732 :     if (NS_SUCCEEDED(result)) {
     695             732 :       result = aScanner.Peek(aChar);
     696             732 :       if (NS_SUCCEEDED(result)) {
     697             732 :         if (aChar == kGreaterThan) { // You just ate the '>'
     698             237 :           aScanner.GetChar(aChar); // Skip the '>'
     699             237 :           done = true;
     700             495 :         } else if (aChar == kLessThan) {
     701               0 :           aToken->SetInError(true);
     702               0 :           done = true;
     703                 :         }
     704                 :       }
     705                 :     }
     706                 :   }
     707                 : 
     708             237 :   if (NS_FAILED(result)) {
     709               0 :     aToken->SetInError(true);
     710                 : 
     711               0 :     if (!aScanner.IsIncremental()) {
     712               0 :       result = NS_OK;
     713                 :     }
     714                 :   }
     715                 : 
     716             237 :   aToken->SetAttributeCount(theAttrCount);
     717             237 :   return result;
     718                 : }
     719                 : 
     720                 : /**
     721                 :  * This method consumes a start tag and all of its attributes.
     722                 :  *
     723                 :  * @param aChar The last character read from the scanner.
     724                 :  * @param aToken The OUT parameter that holds our resulting token. (allocated
     725                 :  *               by the function using mTokenAllocator
     726                 :  * @param aScanner Our source of data
     727                 :  * @param aFlushTokens is an OUT parameter use to tell consumers to flush
     728                 :  *                     the current tokens after processing the current one.
     729                 :  * @return Error result.
     730                 :  */
     731                 : nsresult
     732             794 : nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,
     733                 :                                  CToken*& aToken,
     734                 :                                  nsScanner& aScanner,
     735                 :                                  bool& aFlushTokens)
     736                 : {
     737                 :   // Remember this for later in case you have to unwind...
     738             794 :   PRInt32 theDequeSize = mTokenDeque.GetSize();
     739             794 :   nsresult result = NS_OK;
     740                 : 
     741             794 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
     742             794 :   aToken = theAllocator->CreateTokenOfType(eToken_start, eHTMLTag_unknown);
     743             794 :   NS_ENSURE_TRUE(aToken, NS_ERROR_OUT_OF_MEMORY);
     744                 : 
     745                 :   // Tell the new token to finish consuming text...
     746             794 :   result = aToken->Consume(aChar, aScanner, mFlags);
     747                 : 
     748             794 :   if (NS_SUCCEEDED(result)) {
     749             794 :     AddToken(aToken, result, &mTokenDeque, theAllocator);
     750                 : 
     751             794 :     eHTMLTags theTag = (eHTMLTags)aToken->GetTypeID();
     752                 : 
     753                 :     // Good. Now, let's see if the next char is ">".
     754                 :     // If so, we have a complete tag, otherwise, we have attributes.
     755             794 :     result = aScanner.Peek(aChar);
     756             794 :     if (NS_FAILED(result)) {
     757               0 :       aToken->SetInError(true);
     758                 : 
     759                 :       // Don't return early here so we can create a text and end token for
     760                 :       // the special <iframe>, <script> and similar tags down below.
     761               0 :       result = NS_OK;
     762                 :     } else {
     763             794 :       if (kGreaterThan != aChar) { // Look for a '>'
     764             237 :         result = ConsumeAttributes(aChar, aToken, aScanner);
     765                 :       } else {
     766             557 :         aScanner.GetChar(aChar);
     767                 :       }
     768                 :     }
     769                 : 
     770                 :     /*  Now that that's over with, we have one more problem to solve.
     771                 :         In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
     772                 :         consume all the content itself.
     773                 :         But XML doesn't treat these tags differently, so we shouldn't if the
     774                 :         document is XML.
     775                 :      */
     776             794 :     if (NS_SUCCEEDED(result) && !(mFlags & NS_IPARSER_FLAG_XML)) {
     777             794 :       bool isCDATA = gHTMLElements[theTag].CanContainType(kCDATA);
     778                 :       bool isPCDATA = eHTMLTag_textarea == theTag ||
     779             794 :                         eHTMLTag_title    == theTag;
     780                 : 
     781                 :       // XXX This is an evil hack, we should be able to handle these properly
     782                 :       // in the DTD.
     783             794 :       if ((eHTMLTag_iframe == theTag &&
     784                 :             (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
     785                 :           (eHTMLTag_noframes == theTag &&
     786                 :             (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
     787                 :           (eHTMLTag_noscript == theTag &&
     788                 :             (mFlags & NS_IPARSER_FLAG_SCRIPT_ENABLED)) ||
     789                 :           (eHTMLTag_noembed == theTag)) {
     790               0 :         isCDATA = true;
     791                 :       }
     792                 : 
     793                 :       // Plaintext contains CDATA, but it's special, so we handle it
     794                 :       // differently than the other CDATA elements
     795             794 :       if (eHTMLTag_plaintext == theTag) {
     796               0 :         isCDATA = false;
     797                 : 
     798                 :         // Note: We check in ConsumeToken() for this flag, and if we see it
     799                 :         // we only construct text tokens (which is what we want).
     800               0 :         mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT;
     801                 :       }
     802                 : 
     803                 : 
     804             794 :       if (isCDATA || isPCDATA) {
     805              25 :         bool done = false;
     806              50 :         nsDependentString endTagName(nsHTMLTags::GetStringValue(theTag)); 
     807                 : 
     808                 :         CToken* text =
     809              25 :             theAllocator->CreateTokenOfType(eToken_text, eHTMLTag_text);
     810              25 :         NS_ENSURE_TRUE(text, NS_ERROR_OUT_OF_MEMORY);
     811                 : 
     812              25 :         CTextToken* textToken = static_cast<CTextToken*>(text);
     813                 : 
     814              25 :         if (isCDATA) {
     815                 :           result = textToken->ConsumeCharacterData(theTag != eHTMLTag_script,
     816                 :                                                    aScanner,
     817                 :                                                    endTagName,
     818                 :                                                    mFlags,
     819               0 :                                                    done);
     820                 : 
     821                 :           // Only flush tokens for <script>, to give ourselves more of a
     822                 :           // chance of allowing inlines to contain blocks.
     823               0 :           aFlushTokens = done && theTag == eHTMLTag_script;
     824              25 :         } else if (isPCDATA) {
     825                 :           // Title is consumed conservatively in order to not regress
     826                 :           // bug 42945
     827                 :           result = textToken->ConsumeParsedCharacterData(
     828                 :                                                   theTag == eHTMLTag_textarea,
     829                 :                                                   theTag == eHTMLTag_title,
     830                 :                                                   aScanner,
     831                 :                                                   endTagName,
     832                 :                                                   mFlags,
     833              25 :                                                   done);
     834                 : 
     835                 :           // Note: we *don't* set aFlushTokens here.
     836                 :         }
     837                 : 
     838                 :         // We want to do this unless result is kEOF, in which case we will
     839                 :         // simply unwind our stack and wait for more data anyway.
     840              25 :         if (kEOF != result) {
     841              25 :           AddToken(text, NS_OK, &mTokenDeque, theAllocator);
     842              25 :           CToken* endToken = nsnull;
     843                 : 
     844              25 :           if (NS_SUCCEEDED(result) && done) {
     845                 :             PRUnichar theChar;
     846                 :             // Get the <
     847              25 :             result = aScanner.GetChar(theChar);
     848              25 :             NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kLessThan,
     849                 :                          "CTextToken::Consume*Data is broken!");
     850                 : #ifdef DEBUG
     851                 :             // Ensure we have a /
     852                 :             PRUnichar tempChar;  // Don't change non-debug vars in debug-only code
     853              25 :             result = aScanner.Peek(tempChar);
     854              25 :             NS_ASSERTION(NS_SUCCEEDED(result) && tempChar == kForwardSlash,
     855                 :                          "CTextToken::Consume*Data is broken!");
     856                 : #endif
     857              25 :             result = ConsumeEndTag(PRUnichar('/'), endToken, aScanner);
     858              50 :             if (!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE) &&
     859              25 :                 NS_SUCCEEDED(result)) {
     860                 :               // If ConsumeCharacterData returned a success result (and
     861                 :               // we're not in view source), then we want to make sure that
     862                 :               // we're going to execute this script (since the result means
     863                 :               // that we've found an end tag that satisfies all of the right
     864                 :               // conditions).
     865              25 :               endToken->SetInError(false);
     866              25 :             }
     867               0 :           } else if (result == kFakeEndTag &&
     868               0 :                     !(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
     869               0 :             result = NS_OK;
     870                 :             endToken = theAllocator->CreateTokenOfType(eToken_end, theTag,
     871               0 :                                                        endTagName);
     872               0 :             AddToken(endToken, result, &mTokenDeque, theAllocator);
     873               0 :             if (NS_LIKELY(endToken != nsnull)) {
     874               0 :               endToken->SetInError(true);
     875                 :             }
     876                 :             else {
     877               0 :               result = NS_ERROR_OUT_OF_MEMORY;
     878                 :             }
     879               0 :           } else if (result == kFakeEndTag) {
     880                 :             // If we are here, we are both faking having seen the end tag
     881                 :             // and are in view-source.
     882               0 :             result = NS_OK;
     883                 :           }
     884                 :         } else {
     885               0 :           IF_FREE(text, mTokenAllocator);
     886                 :         }
     887                 :       }
     888                 :     }
     889                 : 
     890                 :     // This code is confusing, so pay attention.
     891                 :     // If you're here, it's because we were in the midst of consuming a start
     892                 :     // tag but ran out of data (not in the stream, but in this *part* of the
     893                 :     // stream. For simplicity, we have to unwind our input. Therefore, we pop
     894                 :     // and discard any new tokens we've queued this round. Later we can get
     895                 :     // smarter about this.
     896             794 :     if (NS_FAILED(result)) {
     897               0 :       while (mTokenDeque.GetSize()>theDequeSize) {
     898               0 :         CToken* theToken = (CToken*)mTokenDeque.Pop();
     899               0 :         IF_FREE(theToken, mTokenAllocator);
     900                 :       }
     901                 :     }
     902                 :   } else {
     903               0 :     IF_FREE(aToken, mTokenAllocator);
     904                 :   }
     905                 : 
     906             794 :   return result;
     907                 : }
     908                 : 
     909                 : /**
     910                 :  * This method consumes an end tag and any "attributes" that may come after it.
     911                 :  *
     912                 :  * @param aChar The last character read from the scanner.
     913                 :  * @param aToken The OUT parameter that holds our resulting token.
     914                 :  * @param aScanner Our source of data
     915                 :  * @return Error result
     916                 :  */
     917                 : nsresult
     918             344 : nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,
     919                 :                                CToken*& aToken,
     920                 :                                nsScanner& aScanner)
     921                 : {
     922                 :   // Get the "/" (we've already seen it with a Peek)
     923             344 :   aScanner.GetChar(aChar);
     924                 : 
     925             344 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
     926             344 :   aToken = theAllocator->CreateTokenOfType(eToken_end, eHTMLTag_unknown);
     927             344 :   NS_ENSURE_TRUE(aToken, NS_ERROR_OUT_OF_MEMORY);
     928                 : 
     929                 :   // Remember this for later in case you have to unwind...
     930             344 :   PRInt32 theDequeSize = mTokenDeque.GetSize();
     931             344 :   nsresult result = NS_OK;
     932                 : 
     933                 :   // Tell the new token to finish consuming text...
     934             344 :   result = aToken->Consume(aChar, aScanner, mFlags);
     935             344 :   AddToken(aToken, result, &mTokenDeque, theAllocator);
     936             344 :   if (NS_FAILED(result)) {
     937                 :     // Note that this early-return here is safe because we have not yet
     938                 :     // added any of our tokens to the queue (AddToken only adds the token if
     939                 :     // result is a success), so we don't need to fall through.
     940               0 :     return result;
     941                 :   }
     942                 : 
     943             344 :   result = aScanner.Peek(aChar);
     944             344 :   if (NS_FAILED(result)) {
     945               0 :     aToken->SetInError(true);
     946                 : 
     947                 :     // Note: We know here that the scanner is not incremental since if
     948                 :     // this peek fails, then we've already masked over a kEOF coming from
     949                 :     // the Consume() call above.
     950               0 :     return NS_OK;
     951                 :   }
     952                 : 
     953             344 :   if (kGreaterThan != aChar) {
     954               0 :     result = ConsumeAttributes(aChar, aToken, aScanner);
     955                 :   } else {
     956             344 :     aScanner.GetChar(aChar);
     957                 :   }
     958                 : 
     959                 :   // Do the same thing as we do in ConsumeStartTag. Basically, if we've run
     960                 :   // out of room in this *section* of the document, pop all of the tokens
     961                 :   // we've consumed this round and wait for more data.
     962             344 :   if (NS_FAILED(result)) {
     963               0 :     while (mTokenDeque.GetSize() > theDequeSize) {
     964               0 :       CToken* theToken = (CToken*)mTokenDeque.Pop();
     965               0 :       IF_FREE(theToken, mTokenAllocator);
     966                 :     }
     967                 :   }
     968                 : 
     969             344 :   return result;
     970                 : }
     971                 : 
     972                 : /**
     973                 :  *  This method is called just after a "&" has been consumed 
     974                 :  *  and we know we're at the start of an entity.  
     975                 :  *  
     976                 :  * @param aChar The last character read from the scanner.
     977                 :  * @param aToken The OUT parameter that holds our resulting token.
     978                 :  * @param aScanner Our source of data
     979                 :  * @return Error result. 
     980                 :  */
     981                 : nsresult
     982               0 : nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,
     983                 :                                CToken*& aToken,
     984                 :                                nsScanner& aScanner)
     985                 : {
     986                 :   PRUnichar  theChar;
     987               0 :   nsresult result = aScanner.Peek(theChar, 1);
     988                 : 
     989               0 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
     990               0 :   if (NS_SUCCEEDED(result)) {
     991               0 :     if (nsCRT::IsAsciiAlpha(theChar) || theChar == kHashsign) {
     992               0 :       aToken = theAllocator->CreateTokenOfType(eToken_entity, eHTMLTag_entity);
     993               0 :       NS_ENSURE_TRUE(aToken, NS_ERROR_OUT_OF_MEMORY);
     994               0 :       result = aToken->Consume(theChar, aScanner, mFlags);
     995                 : 
     996               0 :       if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
     997               0 :         IF_FREE(aToken, mTokenAllocator);
     998                 :       } else {
     999               0 :         if (result == kEOF && !aScanner.IsIncremental()) {
    1000               0 :           result = NS_OK; // Use as much of the entity as you can get.
    1001                 :         }
    1002                 : 
    1003               0 :         AddToken(aToken, result, &mTokenDeque, theAllocator);
    1004               0 :         return result;
    1005                 :       }
    1006                 :     }
    1007                 : 
    1008                 :     // Oops, we're actually looking at plain text...
    1009               0 :     result = ConsumeText(aToken, aScanner);
    1010               0 :   } else if (result == kEOF && !aScanner.IsIncremental()) {
    1011                 :     // If the last character in the file is an &, consume it as text.
    1012               0 :     result = ConsumeText(aToken, aScanner);
    1013               0 :     if (aToken) {
    1014               0 :       aToken->SetInError(true);
    1015                 :     }
    1016                 :   }
    1017                 : 
    1018               0 :   return result;
    1019                 : }
    1020                 : 
    1021                 : 
    1022                 : /**
    1023                 :  *  This method is called just after whitespace has been 
    1024                 :  *  consumed and we know we're at the start a whitespace run.  
    1025                 :  *  
    1026                 :  * @param aChar The last character read from the scanner.
    1027                 :  * @param aToken The OUT parameter that holds our resulting token.
    1028                 :  * @param aScanner Our source of data
    1029                 :  * @return Error result.
    1030                 :  */
    1031                 : nsresult
    1032             281 : nsHTMLTokenizer::ConsumeWhitespace(PRUnichar aChar,
    1033                 :                                    CToken*& aToken,
    1034                 :                                    nsScanner& aScanner)
    1035                 : {
    1036                 :   // Get the whitespace character
    1037             281 :   aScanner.GetChar(aChar);
    1038                 : 
    1039             281 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
    1040                 :   aToken = theAllocator->CreateTokenOfType(eToken_whitespace,
    1041             281 :                                            eHTMLTag_whitespace);
    1042             281 :   nsresult result = NS_OK;
    1043             281 :   if (aToken) {
    1044             281 :     result = aToken->Consume(aChar, aScanner, mFlags);
    1045             281 :     AddToken(aToken, result, &mTokenDeque, theAllocator);
    1046                 :   }
    1047                 : 
    1048             281 :   return result;
    1049                 : }
    1050                 : 
    1051                 : /**
    1052                 :  *  This method is called just after a "<!" has been consumed 
    1053                 :  *  and we know we're at the start of a comment.  
    1054                 :  *  
    1055                 :  * @param aChar The last character read from the scanner.
    1056                 :  * @param aToken The OUT parameter that holds our resulting token.
    1057                 :  * @param aScanner Our source of data
    1058                 :  * @return Error result.
    1059                 :  */
    1060                 : nsresult
    1061              25 : nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,
    1062                 :                                 CToken*& aToken,
    1063                 :                                 nsScanner& aScanner)
    1064                 : {
    1065                 :   // Get the "!"
    1066              25 :   aScanner.GetChar(aChar);
    1067                 : 
    1068              25 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
    1069              25 :   aToken = theAllocator->CreateTokenOfType(eToken_comment, eHTMLTag_comment);
    1070              25 :   nsresult result = NS_OK;
    1071              25 :   if (aToken) {
    1072              25 :     result = aToken->Consume(aChar, aScanner, mFlags);
    1073              25 :     AddToken(aToken, result, &mTokenDeque, theAllocator);
    1074                 :   }
    1075                 : 
    1076              25 :   if (kNotAComment == result) {
    1077                 :     // AddToken has IF_FREE()'d our token, so...
    1078               0 :     result = ConsumeText(aToken, aScanner);
    1079                 :   }
    1080                 : 
    1081              25 :   return result;
    1082                 : }
    1083                 : 
    1084                 : /**
    1085                 :  * This method is called just after a known text char has
    1086                 :  * been consumed and we should read a text run. Note: we actually ignore the
    1087                 :  * first character of the text run so that we can consume invalid markup 
    1088                 :  * as text.
    1089                 :  *  
    1090                 :  * @param aToken The OUT parameter that holds our resulting token.
    1091                 :  * @param aScanner Our source of data
    1092                 :  * @return Error result.
    1093                 :  */ 
    1094                 : nsresult
    1095             281 : nsHTMLTokenizer::ConsumeText(CToken*& aToken, nsScanner& aScanner)
    1096                 : {
    1097             281 :   nsresult result = NS_OK;
    1098             281 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
    1099                 :   CTextToken* theToken =
    1100             281 :     (CTextToken*)theAllocator->CreateTokenOfType(eToken_text, eHTMLTag_text);
    1101             281 :   if (theToken) {
    1102             281 :     PRUnichar ch = '\0';
    1103             281 :     result = theToken->Consume(ch, aScanner, mFlags);
    1104             281 :     if (NS_FAILED(result)) {
    1105               0 :       if (0 == theToken->GetTextLength()) {
    1106               0 :         IF_FREE(aToken, mTokenAllocator);
    1107               0 :         aToken = nsnull;
    1108                 :       } else {
    1109               0 :         result = NS_OK;
    1110                 :       }
    1111                 :     }
    1112                 : 
    1113             281 :     aToken = theToken;
    1114             281 :     AddToken(aToken, result, &mTokenDeque, theAllocator);
    1115                 :   }
    1116                 : 
    1117             281 :   return result;
    1118                 : }
    1119                 : 
    1120                 : /**
    1121                 :  * This method is called just after a "<!" has been consumed.
    1122                 :  * NOTE: Here we might consume DOCTYPE and "special" markups. 
    1123                 :  * 
    1124                 :  * @param aChar The last character read from the scanner.
    1125                 :  * @param aToken The OUT parameter that holds our resulting token.
    1126                 :  * @param aScanner Our source of data
    1127                 :  * @return Error result.
    1128                 :  */
    1129                 : nsresult
    1130              25 : nsHTMLTokenizer::ConsumeSpecialMarkup(PRUnichar aChar,
    1131                 :                                       CToken*& aToken,
    1132                 :                                       nsScanner& aScanner)
    1133                 : {
    1134                 :   // Get the "!"
    1135              25 :   aScanner.GetChar(aChar);
    1136                 : 
    1137              25 :   nsresult result = NS_OK;
    1138              50 :   nsAutoString theBufCopy;
    1139              25 :   aScanner.Peek(theBufCopy, 20);
    1140              25 :   ToUpperCase(theBufCopy);
    1141              25 :   PRInt32 theIndex = theBufCopy.Find("DOCTYPE", false, 0, 0);
    1142              25 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
    1143                 : 
    1144              25 :   if (theIndex == kNotFound) {
    1145               0 :     if ('[' == theBufCopy.CharAt(0)) {
    1146                 :       aToken = theAllocator->CreateTokenOfType(eToken_cdatasection,
    1147               0 :                                                eHTMLTag_comment);
    1148               0 :     } else if (StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ELEMENT")) ||
    1149               0 :                StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ATTLIST")) ||
    1150               0 :                StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ENTITY"))  ||
    1151               0 :                StringBeginsWith(theBufCopy, NS_LITERAL_STRING("NOTATION"))) {
    1152                 :       aToken = theAllocator->CreateTokenOfType(eToken_markupDecl,
    1153               0 :                                                eHTMLTag_markupDecl);
    1154                 :     } else {
    1155                 :       aToken = theAllocator->CreateTokenOfType(eToken_comment,
    1156               0 :                                                eHTMLTag_comment);
    1157                 :     }
    1158                 :   } else {
    1159                 :     aToken = theAllocator->CreateTokenOfType(eToken_doctypeDecl,
    1160              25 :                                              eHTMLTag_doctypeDecl);
    1161                 :   }
    1162                 : 
    1163              25 :   if (aToken) {
    1164              25 :     result = aToken->Consume(aChar, aScanner, mFlags);
    1165              25 :     AddToken(aToken, result, &mTokenDeque, theAllocator);
    1166                 :   }
    1167                 : 
    1168              25 :   if (result == kNotAComment) {
    1169               0 :     result = ConsumeText(aToken, aScanner);
    1170                 :   }
    1171                 : 
    1172              25 :   return result;
    1173                 : }
    1174                 : 
    1175                 : /**
    1176                 :  * This method is called just after a newline has been consumed. 
    1177                 :  *  
    1178                 :  * @param aChar The last character read from the scanner.
    1179                 :  * @param aToken The OUT parameter that holds our resulting token.
    1180                 :  * @param aScanner Our source of data
    1181                 :  * @return Error result.
    1182                 :  */
    1183                 : nsresult
    1184             530 : nsHTMLTokenizer::ConsumeNewline(PRUnichar aChar,
    1185                 :                                 CToken*& aToken,
    1186                 :                                 nsScanner& aScanner)
    1187                 : {
    1188                 :   // Get the newline character
    1189             530 :   aScanner.GetChar(aChar);
    1190                 : 
    1191             530 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
    1192             530 :   aToken = theAllocator->CreateTokenOfType(eToken_newline, eHTMLTag_newline);
    1193             530 :   nsresult result = NS_OK;
    1194             530 :   if (aToken) {
    1195             530 :     result = aToken->Consume(aChar, aScanner, mFlags);
    1196             530 :     AddToken(aToken, result, &mTokenDeque, theAllocator);
    1197                 :   }
    1198                 : 
    1199             530 :   return result;
    1200                 : }
    1201                 : 
    1202                 : 
    1203                 : /**
    1204                 :  * This method is called just after a <? has been consumed. 
    1205                 :  *  
    1206                 :  * @param aChar The last character read from the scanner.
    1207                 :  * @param aToken The OUT parameter that holds our resulting token.
    1208                 :  * @param aScanner Our source of data
    1209                 :  * @return Error result.
    1210                 :  */
    1211                 : nsresult
    1212               0 : nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,
    1213                 :                                               CToken*& aToken,
    1214                 :                                               nsScanner& aScanner)
    1215                 : {
    1216                 :   // Get the "?"
    1217               0 :   aScanner.GetChar(aChar);
    1218                 : 
    1219               0 :   nsTokenAllocator* theAllocator = this->GetTokenAllocator();
    1220                 :   aToken = theAllocator->CreateTokenOfType(eToken_instruction,
    1221               0 :                                            eHTMLTag_unknown);
    1222               0 :   nsresult result = NS_OK;
    1223               0 :   if (aToken) {
    1224               0 :     result = aToken->Consume(aChar, aScanner, mFlags);
    1225               0 :     AddToken(aToken, result, &mTokenDeque, theAllocator);
    1226                 :   }
    1227                 : 
    1228               0 :   return result;
    1229                 : }

Generated by: LCOV version 1.7