LCOV - code coverage report
Current view: directory - parser/htmlparser/src - nsParser.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 886 566 63.9 %
Date: 2012-06-02 Functions: 72 45 62.5 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* vim: set sw=2 ts=2 et tw=79: */
       3                 : /* ***** BEGIN LICENSE BLOCK *****
       4                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       5                 :  *
       6                 :  * The contents of this file are subject to the Mozilla Public License Version
       7                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       8                 :  * the License. You may obtain a copy of the License at
       9                 :  * http://www.mozilla.org/MPL/
      10                 :  *
      11                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      12                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      13                 :  * for the specific language governing rights and limitations under the
      14                 :  * License.
      15                 :  *
      16                 :  * The Original Code is mozilla.org code.
      17                 :  *
      18                 :  * The Initial Developer of the Original Code is
      19                 :  * Netscape Communications Corporation.
      20                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      21                 :  * the Initial Developer. All Rights Reserved.
      22                 :  *
      23                 :  * Contributor(s):
      24                 :  *   Pierre Phaneuf <pp@ludusdesign.com>
      25                 :  *
      26                 :  * Alternatively, the contents of this file may be used under the terms of
      27                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      28                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      29                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      30                 :  * of those above. If you wish to allow use of your version of this file only
      31                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      32                 :  * use your version of this file under the terms of the MPL, indicate your
      33                 :  * decision by deleting the provisions above and replace them with the notice
      34                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      35                 :  * the provisions above, a recipient may use your version of this file under
      36                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      37                 :  *
      38                 :  * ***** END LICENSE BLOCK ***** */
      39                 : 
      40                 : #include "nsIAtom.h"
      41                 : #include "nsParser.h"
      42                 : #include "nsString.h"
      43                 : #include "nsCRT.h"
      44                 : #include "nsScanner.h"
      45                 : #include "plstr.h"
      46                 : #include "nsIStringStream.h"
      47                 : #include "nsIChannel.h"
      48                 : #include "nsICachingChannel.h"
      49                 : #include "nsICacheEntryDescriptor.h"
      50                 : #include "nsCharsetAlias.h"
      51                 : #include "nsICharsetConverterManager.h"
      52                 : #include "nsIInputStream.h"
      53                 : #include "CNavDTD.h"
      54                 : #include "prenv.h"
      55                 : #include "prlock.h"
      56                 : #include "prcvar.h"
      57                 : #include "nsParserCIID.h"
      58                 : #include "nsReadableUtils.h"
      59                 : #include "nsCOMPtr.h"
      60                 : #include "nsExpatDriver.h"
      61                 : #include "nsIServiceManager.h"
      62                 : #include "nsICategoryManager.h"
      63                 : #include "nsISupportsPrimitives.h"
      64                 : #include "nsIFragmentContentSink.h"
      65                 : #include "nsStreamUtils.h"
      66                 : #include "nsHTMLTokenizer.h"
      67                 : #include "nsIDocument.h"
      68                 : #include "nsNetUtil.h"
      69                 : #include "nsScriptLoader.h"
      70                 : #include "nsDataHashtable.h"
      71                 : #include "nsIThreadPool.h"
      72                 : #include "nsXPCOMCIDInternal.h"
      73                 : #include "nsMimeTypes.h"
      74                 : #include "mozilla/CondVar.h"
      75                 : #include "mozilla/Mutex.h"
      76                 : #include "nsParserConstants.h"
      77                 : 
      78                 : using namespace mozilla;
      79                 : 
      80                 : #define NS_PARSER_FLAG_PARSER_ENABLED         0x00000002
      81                 : #define NS_PARSER_FLAG_OBSERVERS_ENABLED      0x00000004
      82                 : #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
      83                 : #define NS_PARSER_FLAG_FLUSH_TOKENS           0x00000020
      84                 : #define NS_PARSER_FLAG_CAN_TOKENIZE           0x00000040
      85                 : 
      86                 : static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
      87                 : static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
      88                 : static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
      89                 : 
      90                 : //-------------- Begin ParseContinue Event Definition ------------------------
      91                 : /*
      92                 : The parser can be explicitly interrupted by passing a return value of
      93                 : NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
      94                 : the parser to stop processing and allow the application to return to the event
      95                 : loop. The data which was left at the time of interruption will be processed
      96                 : the next time OnDataAvailable is called. If the parser has received its final
      97                 : chunk of data then OnDataAvailable will no longer be called by the networking
      98                 : module, so the parser will schedule a nsParserContinueEvent which will call
      99                 : the parser to process the remaining data after returning to the event loop.
     100                 : If the parser is interrupted while processing the remaining data it will
     101                 : schedule another ParseContinueEvent. The processing of data followed by
     102                 : scheduling of the continue events will proceed until either:
     103                 : 
     104                 :   1) All of the remaining data can be processed without interrupting
     105                 :   2) The parser has been cancelled.
     106                 : 
     107                 : 
     108                 : This capability is currently used in CNavDTD and nsHTMLContentSink. The
     109                 : nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
     110                 : processed and when each token is processed. The nsHTML content sink records
     111                 : the time when the chunk has started processing and will return
     112                 : NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
     113                 : threshold called max tokenizing processing time. This allows the content sink
     114                 : to limit how much data is processed in a single chunk which in turn gates how
     115                 : much time is spent away from the event loop. Processing smaller chunks of data
     116                 : also reduces the time spent in subsequent reflows.
     117                 : 
     118                 : This capability is most apparent when loading large documents. If the maximum
     119                 : token processing time is set small enough the application will remain
     120                 : responsive during document load.
     121                 : 
     122                 : A side-effect of this capability is that document load is not complete when
     123                 : the last chunk of data is passed to OnDataAvailable since  the parser may have
     124                 : been interrupted when the last chunk of data arrived. The document is complete
     125                 : when all of the document has been tokenized and there aren't any pending
     126                 : nsParserContinueEvents. This can cause problems if the application assumes
     127                 : that it can monitor the load requests to determine when the document load has
     128                 : been completed. This is what happens in Mozilla. The document is considered
     129                 : completely loaded when all of the load requests have been satisfied. To delay
     130                 : the document load until all of the parsing has been completed the
     131                 : nsHTMLContentSink adds a dummy parser load request which is not removed until
     132                 : the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
     133                 : DidBuildModel until the final chunk of data has been passed to the parser
     134                 : through the OnDataAvailable and there aren't any pending
     135                 : nsParserContineEvents.
     136                 : 
     137                 : Currently the parser is ignores requests to be interrupted during the
     138                 : processing of script.  This is because a document.write followed by JavaScript
     139                 : calls to manipulate the DOM may fail if the parser was interrupted during the
     140                 : document.write.
     141                 : 
     142                 : For more details @see bugzilla bug 76722
     143                 : */
     144                 : 
     145                 : 
     146                 : class nsParserContinueEvent : public nsRunnable
     147               0 : {
     148                 : public:
     149                 :   nsRefPtr<nsParser> mParser;
     150                 : 
     151               0 :   nsParserContinueEvent(nsParser* aParser)
     152               0 :     : mParser(aParser)
     153               0 :   {}
     154                 : 
     155               0 :   NS_IMETHOD Run()
     156                 :   {
     157               0 :     mParser->HandleParserContinueEvent(this);
     158               0 :     return NS_OK;
     159                 :   }
     160                 : };
     161                 : 
     162                 : //-------------- End ParseContinue Event Definition ------------------------
     163                 : 
     164                 : nsICharsetConverterManager* nsParser::sCharsetConverterManager = nsnull;
     165                 : 
     166                 : /**
     167                 :  *  This gets called when the htmlparser module is initialized.
     168                 :  */
     169                 : // static
     170                 : nsresult
     171             263 : nsParser::Init()
     172                 : {
     173                 :   nsresult rv;
     174                 : 
     175                 :   nsCOMPtr<nsICharsetConverterManager> charsetConverter =
     176             526 :     do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
     177             263 :   NS_ENSURE_SUCCESS(rv, rv);
     178                 : 
     179             263 :   charsetConverter.swap(sCharsetConverterManager);
     180                 : 
     181             263 :   return NS_OK;
     182                 : }
     183                 : 
     184                 : 
     185                 : /**
     186                 :  *  This gets called when the htmlparser module is shutdown.
     187                 :  */
     188                 : // static
     189             263 : void nsParser::Shutdown()
     190                 : {
     191             263 :   NS_IF_RELEASE(sCharsetConverterManager);
     192             263 : }
     193                 : 
     194                 : #ifdef DEBUG
     195                 : static bool gDumpContent=false;
     196                 : #endif
     197                 : 
     198                 : /**
     199                 :  *  default constructor
     200                 :  */
     201            3345 : nsParser::nsParser()
     202                 : {
     203            3345 :   Initialize(true);
     204            3345 : }
     205                 : 
     206           10035 : nsParser::~nsParser()
     207                 : {
     208            3345 :   Cleanup();
     209           13380 : }
     210                 : 
     211                 : void
     212            3346 : nsParser::Initialize(bool aConstructor)
     213                 : {
     214                 : #ifdef NS_DEBUG
     215            3346 :   if (!gDumpContent) {
     216            3346 :     gDumpContent = PR_GetEnv("PARSER_DUMP_CONTENT") != nsnull;
     217                 :   }
     218                 : #endif
     219                 : 
     220            3346 :   if (aConstructor) {
     221                 :     // Raw pointer
     222            3345 :     mParserContext = 0;
     223                 :   }
     224                 :   else {
     225                 :     // nsCOMPtrs
     226               1 :     mObserver = nsnull;
     227               1 :     mUnusedInput.Truncate();
     228                 :   }
     229                 : 
     230            3346 :   mContinueEvent = nsnull;
     231            3346 :   mCharsetSource = kCharsetUninitialized;
     232            3346 :   mCharset.AssignLiteral("ISO-8859-1");
     233            3346 :   mInternalState = NS_OK;
     234            3346 :   mStreamStatus = 0;
     235            3346 :   mCommand = eViewNormal;
     236                 :   mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
     237                 :            NS_PARSER_FLAG_PARSER_ENABLED |
     238            3346 :            NS_PARSER_FLAG_CAN_TOKENIZE;
     239                 : 
     240            3346 :   mProcessingNetworkData = false;
     241            3346 :   mIsAboutBlank = false;
     242            3346 : }
     243                 : 
     244                 : void
     245            3346 : nsParser::Cleanup()
     246                 : {
     247                 : #ifdef NS_DEBUG
     248            3346 :   if (gDumpContent) {
     249               0 :     if (mSink) {
     250                 :       // Sink (HTMLContentSink at this time) supports nsIDebugDumpContent
     251                 :       // interface. We can get to the content model through the sink.
     252               0 :       nsresult result = NS_OK;
     253               0 :       nsCOMPtr<nsIDebugDumpContent> trigger = do_QueryInterface(mSink, &result);
     254               0 :       if (NS_SUCCEEDED(result)) {
     255               0 :         trigger->DumpContentModel();
     256                 :       }
     257                 :     }
     258                 :   }
     259                 : #endif
     260                 : 
     261                 : #ifdef DEBUG
     262            3346 :   if (mParserContext && mParserContext->mPrevContext) {
     263               0 :     NS_WARNING("Extra parser contexts still on the parser stack");
     264                 :   }
     265                 : #endif
     266                 : 
     267           10036 :   while (mParserContext) {
     268            3344 :     CParserContext *pc = mParserContext->mPrevContext;
     269            3344 :     delete mParserContext;
     270            3344 :     mParserContext = pc;
     271                 :   }
     272                 : 
     273                 :   // It should not be possible for this flag to be set when we are getting
     274                 :   // destroyed since this flag implies a pending nsParserContinueEvent, which
     275                 :   // has an owning reference to |this|.
     276            3346 :   NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
     277            3346 : }
     278                 : 
     279            1464 : NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
     280                 : 
     281               0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
     282               0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mDTD)
     283               0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mSink)
     284               0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mObserver)
     285               0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_END
     286                 : 
     287             111 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
     288             111 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mDTD)
     289             111 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mSink)
     290             111 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mObserver)
     291             111 :   CParserContext *pc = tmp->mParserContext;
     292             333 :   while (pc) {
     293             111 :     cb.NoteXPCOMChild(pc->mTokenizer);
     294             111 :     pc = pc->mPrevContext;
     295                 :   }
     296             111 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
     297                 : 
     298           19687 : NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
     299           19687 : NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
     300           39567 : NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
     301           26630 :   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
     302           20431 :   NS_INTERFACE_MAP_ENTRY(nsIParser)
     303           13673 :   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
     304           13673 :   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
     305           13673 :   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
     306           11865 : NS_INTERFACE_MAP_END
     307                 : 
     308                 : // The parser continue event is posted only if
     309                 : // all of the data to parse has been passed to ::OnDataAvailable
     310                 : // and the parser has been interrupted by the content sink
     311                 : // because the processing of tokens took too long.
     312                 : 
     313                 : nsresult
     314               0 : nsParser::PostContinueEvent()
     315                 : {
     316               0 :   if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
     317                 :     // If this flag isn't set, then there shouldn't be a live continue event!
     318               0 :     NS_ASSERTION(!mContinueEvent, "bad");
     319                 : 
     320                 :     // This creates a reference cycle between this and the event that is
     321                 :     // broken when the event fires.
     322               0 :     nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
     323               0 :     if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
     324               0 :         NS_WARNING("failed to dispatch parser continuation event");
     325                 :     } else {
     326               0 :         mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
     327               0 :         mContinueEvent = event;
     328                 :     }
     329                 :   }
     330               0 :   return NS_OK;
     331                 : }
     332                 : 
     333                 : NS_IMETHODIMP_(void)
     334               0 : nsParser::GetCommand(nsCString& aCommand)
     335                 : {
     336               0 :   aCommand = mCommandStr;
     337               0 : }
     338                 : 
     339                 : /**
     340                 :  *  Call this method once you've created a parser, and want to instruct it
     341                 :  *  about the command which caused the parser to be constructed. For example,
     342                 :  *  this allows us to select a DTD which can do, say, view-source.
     343                 :  *
     344                 :  *  @param   aCommand the command string to set
     345                 :  */
     346                 : NS_IMETHODIMP_(void)
     347            1038 : nsParser::SetCommand(const char* aCommand)
     348                 : {
     349            1038 :   mCommandStr.Assign(aCommand);
     350            1038 :   if (mCommandStr.Equals("view-source")) {
     351               0 :     mCommand = eViewSource;
     352            1038 :   } else if (mCommandStr.Equals("view-fragment")) {
     353               0 :     mCommand = eViewFragment;
     354                 :   } else {
     355            1038 :     mCommand = eViewNormal;
     356                 :   }
     357            1038 : }
     358                 : 
     359                 : /**
     360                 :  *  Call this method once you've created a parser, and want to instruct it
     361                 :  *  about the command which caused the parser to be constructed. For example,
     362                 :  *  this allows us to select a DTD which can do, say, view-source.
     363                 :  *
     364                 :  *  @param   aParserCommand the command to set
     365                 :  */
     366                 : NS_IMETHODIMP_(void)
     367               0 : nsParser::SetCommand(eParserCommands aParserCommand)
     368                 : {
     369               0 :   mCommand = aParserCommand;
     370               0 : }
     371                 : 
     372                 : /**
     373                 :  *  Call this method once you've created a parser, and want to instruct it
     374                 :  *  about what charset to load
     375                 :  *
     376                 :  *  @param   aCharset- the charset of a document
     377                 :  *  @param   aCharsetSource- the source of the charset
     378                 :  */
     379                 : NS_IMETHODIMP_(void)
     380            3906 : nsParser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource)
     381                 : {
     382            3906 :   mCharset = aCharset;
     383            3906 :   mCharsetSource = aCharsetSource;
     384            3906 :   if (mParserContext && mParserContext->mScanner) {
     385             591 :      mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
     386                 :   }
     387            3906 : }
     388                 : 
     389                 : void
     390             591 : nsParser::SetSinkCharset(nsACString& aCharset)
     391                 : {
     392             591 :   if (mSink) {
     393             591 :     mSink->SetDocumentCharset(aCharset);
     394                 :   }
     395             591 : }
     396                 : 
     397                 : /**
     398                 :  *  This method gets called in order to set the content
     399                 :  *  sink for this parser to dump nodes to.
     400                 :  *
     401                 :  *  @param   nsIContentSink interface for node receiver
     402                 :  */
     403                 : NS_IMETHODIMP_(void)
     404            3344 : nsParser::SetContentSink(nsIContentSink* aSink)
     405                 : {
     406            3344 :   NS_PRECONDITION(aSink, "sink cannot be null!");
     407            3344 :   mSink = aSink;
     408                 : 
     409            3344 :   if (mSink) {
     410            3344 :     mSink->SetParser(this);
     411            6688 :     nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
     412            3344 :     if (htmlSink) {
     413              28 :       mIsAboutBlank = htmlSink->IsAboutBlank();
     414                 :     }
     415                 :   }
     416            3344 : }
     417                 : 
     418                 : /**
     419                 :  * retrieve the sink set into the parser
     420                 :  * @return  current sink
     421                 :  */
     422                 : NS_IMETHODIMP_(nsIContentSink*)
     423            1038 : nsParser::GetContentSink()
     424                 : {
     425            1038 :   return mSink;
     426                 : }
     427                 : 
     428                 : /**
     429                 :  * Determine what DTD mode (and thus what layout nsCompatibility mode)
     430                 :  * to use for this document based on the first chunk of data received
     431                 :  * from the network (each parsercontext can have its own mode).  (No,
     432                 :  * this is not an optimal solution -- we really don't need to know until
     433                 :  * after we've received the DOCTYPE, and this could easily be part of
     434                 :  * the regular parsing process if the parser were designed in a way that
     435                 :  * made such modifications easy.)
     436                 :  */
     437                 : 
     438                 : // Parse the PS production in the SGML spec (excluding the part dealing
     439                 : // with entity references) starting at theIndex into theBuffer, and
     440                 : // return the first index after the end of the production.
     441                 : static PRInt32
     442              50 : ParsePS(const nsString& aBuffer, PRInt32 aIndex)
     443                 : {
     444              25 :   for (;;) {
     445              50 :     PRUnichar ch = aBuffer.CharAt(aIndex);
     446              50 :     if ((ch == PRUnichar(' ')) || (ch == PRUnichar('\t')) ||
     447                 :         (ch == PRUnichar('\n')) || (ch == PRUnichar('\r'))) {
     448              25 :       ++aIndex;
     449              25 :     } else if (ch == PRUnichar('-')) {
     450                 :       PRInt32 tmpIndex;
     451               0 :       if (aBuffer.CharAt(aIndex+1) == PRUnichar('-') &&
     452               0 :           kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
     453               0 :         aIndex = tmpIndex + 2;
     454                 :       } else {
     455               0 :         return aIndex;
     456                 :       }
     457                 :     } else {
     458              25 :       return aIndex;
     459                 :     }
     460                 :   }
     461                 : }
     462                 : 
     463                 : #define PARSE_DTD_HAVE_DOCTYPE          (1<<0)
     464                 : #define PARSE_DTD_HAVE_PUBLIC_ID        (1<<1)
     465                 : #define PARSE_DTD_HAVE_SYSTEM_ID        (1<<2)
     466                 : #define PARSE_DTD_HAVE_INTERNAL_SUBSET  (1<<3)
     467                 : 
     468                 : // return true on success (includes not present), false on failure
     469                 : static bool
     470              28 : ParseDocTypeDecl(const nsString &aBuffer,
     471                 :                  PRInt32 *aResultFlags,
     472                 :                  nsString &aPublicID,
     473                 :                  nsString &aSystemID)
     474                 : {
     475              28 :   bool haveDoctype = false;
     476              28 :   *aResultFlags = 0;
     477                 : 
     478                 :   // Skip through any comments and processing instructions
     479                 :   // The PI-skipping is a bit of a hack.
     480              28 :   PRInt32 theIndex = 0;
     481               0 :   do {
     482              28 :     theIndex = aBuffer.FindChar('<', theIndex);
     483              28 :     if (theIndex == kNotFound) break;
     484              25 :     PRUnichar nextChar = aBuffer.CharAt(theIndex+1);
     485              25 :     if (nextChar == PRUnichar('!')) {
     486              25 :       PRInt32 tmpIndex = theIndex + 2;
     487              25 :       if (kNotFound !=
     488                 :           (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
     489              25 :         haveDoctype = true;
     490              25 :         theIndex += 7; // skip "DOCTYPE"
     491              25 :         break;
     492                 :       }
     493               0 :       theIndex = ParsePS(aBuffer, tmpIndex);
     494               0 :       theIndex = aBuffer.FindChar('>', theIndex);
     495               0 :     } else if (nextChar == PRUnichar('?')) {
     496               0 :       theIndex = aBuffer.FindChar('>', theIndex);
     497                 :     } else {
     498               0 :       break;
     499                 :     }
     500                 :   } while (theIndex != kNotFound);
     501                 : 
     502              28 :   if (!haveDoctype)
     503               3 :     return true;
     504              25 :   *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
     505                 : 
     506              25 :   theIndex = ParsePS(aBuffer, theIndex);
     507              25 :   theIndex = aBuffer.Find("HTML", true, theIndex, 0);
     508              25 :   if (kNotFound == theIndex)
     509              25 :     return false;
     510               0 :   theIndex = ParsePS(aBuffer, theIndex+4);
     511               0 :   PRInt32 tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
     512                 : 
     513               0 :   if (kNotFound != tmpIndex) {
     514               0 :     theIndex = ParsePS(aBuffer, tmpIndex+6);
     515                 : 
     516                 :     // We get here only if we've read <!DOCTYPE HTML PUBLIC
     517                 :     // (not case sensitive) possibly with comments within.
     518                 : 
     519                 :     // Now find the beginning and end of the public identifier
     520                 :     // and the system identifier (if present).
     521                 : 
     522               0 :     PRUnichar lit = aBuffer.CharAt(theIndex);
     523               0 :     if ((lit != PRUnichar('\"')) && (lit != PRUnichar('\'')))
     524               0 :       return false;
     525                 : 
     526                 :     // Start is the first character, excluding the quote, and End is
     527                 :     // the final quote, so there are (end-start) characters.
     528                 : 
     529               0 :     PRInt32 PublicIDStart = theIndex + 1;
     530               0 :     PRInt32 PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
     531               0 :     if (kNotFound == PublicIDEnd)
     532               0 :       return false;
     533               0 :     theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
     534               0 :     PRUnichar next = aBuffer.CharAt(theIndex);
     535               0 :     if (next == PRUnichar('>')) {
     536                 :       // There was a public identifier, but no system
     537                 :       // identifier,
     538                 :       // so do nothing.
     539                 :       // This is needed to avoid the else at the end, and it's
     540                 :       // also the most common case.
     541               0 :     } else if ((next == PRUnichar('\"')) ||
     542                 :                (next == PRUnichar('\''))) {
     543                 :       // We found a system identifier.
     544               0 :       *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
     545               0 :       PRInt32 SystemIDStart = theIndex + 1;
     546               0 :       PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
     547               0 :       if (kNotFound == SystemIDEnd)
     548               0 :         return false;
     549                 :       aSystemID =
     550               0 :         Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
     551               0 :     } else if (next == PRUnichar('[')) {
     552                 :       // We found an internal subset.
     553               0 :       *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
     554                 :     } else {
     555                 :       // Something's wrong.
     556               0 :       return false;
     557                 :     }
     558                 : 
     559                 :     // Since a public ID is a minimum literal, we must trim
     560                 :     // and collapse whitespace
     561               0 :     aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
     562               0 :     aPublicID.CompressWhitespace(true, true);
     563               0 :     *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
     564                 :   } else {
     565               0 :     tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
     566               0 :     if (kNotFound != tmpIndex) {
     567                 :       // DOCTYPES with system ID but no Public ID
     568               0 :       *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
     569                 : 
     570               0 :       theIndex = ParsePS(aBuffer, tmpIndex+6);
     571               0 :       PRUnichar next = aBuffer.CharAt(theIndex);
     572               0 :       if (next != PRUnichar('\"') && next != PRUnichar('\''))
     573               0 :         return false;
     574                 : 
     575               0 :       PRInt32 SystemIDStart = theIndex + 1;
     576               0 :       PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
     577                 : 
     578               0 :       if (kNotFound == SystemIDEnd)
     579               0 :         return false;
     580                 :       aSystemID =
     581               0 :         Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
     582               0 :       theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
     583                 :     }
     584                 : 
     585               0 :     PRUnichar nextChar = aBuffer.CharAt(theIndex);
     586               0 :     if (nextChar == PRUnichar('['))
     587               0 :       *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
     588               0 :     else if (nextChar != PRUnichar('>'))
     589               0 :       return false;
     590                 :   }
     591               0 :   return true;
     592                 : }
     593                 : 
     594                 : struct PubIDInfo
     595                 : {
     596                 :   enum eMode {
     597                 :     eQuirks,         /* always quirks mode, unless there's an internal subset */
     598                 :     eAlmostStandards,/* eCompatibility_AlmostStandards */
     599                 :     eFullStandards   /* eCompatibility_FullStandards */
     600                 :       /*
     601                 :        * public IDs that should trigger strict mode are not listed
     602                 :        * since we want all future public IDs to trigger strict mode as
     603                 :        * well
     604                 :        */
     605                 :   };
     606                 : 
     607                 :   const char* name;
     608                 :   eMode mode_if_no_sysid;
     609                 :   eMode mode_if_sysid;
     610                 : };
     611                 : 
     612                 : #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
     613                 : 
     614                 : // These must be in nsCRT::strcmp order so binary-search can be used.
     615                 : // This is verified, |#ifdef DEBUG|, below.
     616                 : 
     617                 : // Even though public identifiers should be case sensitive, we will do
     618                 : // all comparisons after converting to lower case in order to do
     619                 : // case-insensitive comparison since there are a number of existing web
     620                 : // sites that use the incorrect case.  Therefore all of the public
     621                 : // identifiers below are in lower case (with the correct case following,
     622                 : // in comments).  The case is verified, |#ifdef DEBUG|, below.
     623                 : static const PubIDInfo kPublicIDs[] = {
     624                 :   {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     625                 :   {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     626                 :   {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     627                 :   {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     628                 :   {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     629                 :   {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     630                 :   {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     631                 :   {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     632                 :   {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     633                 :   {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     634                 :   {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     635                 :   {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     636                 :   {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     637                 :   {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     638                 :   {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     639                 :   {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     640                 :   {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     641                 :   {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     642                 :   {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     643                 :   {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     644                 :   {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     645                 :   {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     646                 :   {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     647                 :   {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     648                 :   {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     649                 :   {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     650                 :   {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     651                 :   {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     652                 :   {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     653                 :   {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     654                 :   {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     655                 :   {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     656                 :   {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     657                 :   {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     658                 :   {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     659                 :   {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     660                 :   {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     661                 :   {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     662                 :   {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     663                 :   {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     664                 :   {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     665                 :   {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     666                 :   {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     667                 :   {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     668                 :   {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     669                 :   {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     670                 :   {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     671                 :   {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     672                 :   {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     673                 :   {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     674                 :   {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     675                 :   {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     676                 :   {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     677                 :   {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     678                 :   {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     679                 :   {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     680                 :   {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     681                 :   {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     682                 :   {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     683                 :   {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     684                 :   {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     685                 :   {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     686                 :   {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
     687                 :   {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
     688                 :   {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     689                 :   {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     690                 :   {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     691                 :   {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
     692                 :   {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
     693                 :   {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     694                 :   {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     695                 :   {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     696                 :   {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     697                 :   {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     698                 :   {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     699                 :   {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
     700                 : };
     701                 : 
     702                 : #ifdef DEBUG
     703                 : static void
     704              28 : VerifyPublicIDs()
     705                 : {
     706                 :   static bool gVerified = false;
     707              28 :   if (!gVerified) {
     708              10 :     gVerified = true;
     709                 :     PRUint32 i;
     710             760 :     for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
     711             750 :       if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
     712               0 :         NS_NOTREACHED("doctypes out of order");
     713                 :         printf("Doctypes %s and %s out of order.\n",
     714               0 :                kPublicIDs[i].name, kPublicIDs[i+1].name);
     715                 :       }
     716                 :     }
     717             770 :     for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
     718            1520 :       nsCAutoString lcPubID(kPublicIDs[i].name);
     719             760 :       ToLowerCase(lcPubID);
     720             760 :       if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
     721               0 :         NS_NOTREACHED("doctype not lower case");
     722               0 :         printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
     723                 :       }
     724                 :     }
     725                 :   }
     726              28 : }
     727                 : #endif
     728                 : 
     729                 : static void
     730              28 : DetermineHTMLParseMode(const nsString& aBuffer,
     731                 :                        nsDTDMode& aParseMode,
     732                 :                        eParserDocType& aDocType)
     733                 : {
     734                 : #ifdef DEBUG
     735              28 :   VerifyPublicIDs();
     736                 : #endif
     737                 :   PRInt32 resultFlags;
     738              56 :   nsAutoString publicIDUCS2, sysIDUCS2;
     739              28 :   if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
     740               3 :     if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
     741                 :       // no DOCTYPE
     742               3 :       aParseMode = eDTDMode_quirks;
     743               3 :       aDocType = eHTML_Quirks;
     744               0 :     } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
     745               0 :                !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
     746                 :       // A doctype with an internal subset is always full_standards.
     747                 :       // A doctype without a public ID is always full_standards.
     748               0 :       aDocType = eHTML_Strict;
     749               0 :       aParseMode = eDTDMode_full_standards;
     750                 : 
     751                 :       // Special hack for IBM's custom DOCTYPE.
     752               0 :       if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
     753               0 :           sysIDUCS2 == NS_LITERAL_STRING(
     754                 :                "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
     755               0 :         aParseMode = eDTDMode_quirks;
     756               0 :         aDocType = eHTML_Quirks;
     757                 :       }
     758                 : 
     759                 :     } else {
     760                 :       // We have to check our list of public IDs to see what to do.
     761                 :       // Yes, we want UCS2 to ASCII lossy conversion.
     762               0 :       nsCAutoString publicID;
     763               0 :       publicID.AssignWithConversion(publicIDUCS2);
     764                 : 
     765                 :       // See comment above definition of kPublicIDs about case
     766                 :       // sensitivity.
     767               0 :       ToLowerCase(publicID);
     768                 : 
     769                 :       // Binary search to see if we can find the correct public ID
     770                 :       // These must be signed since maximum can go below zero and we'll
     771                 :       // crash if it's unsigned.
     772               0 :       PRInt32 minimum = 0;
     773               0 :       PRInt32 maximum = ELEMENTS_OF(kPublicIDs) - 1;
     774                 :       PRInt32 index;
     775               0 :       for (;;) {
     776               0 :         index = (minimum + maximum) / 2;
     777                 :         PRInt32 comparison =
     778               0 :             nsCRT::strcmp(publicID.get(), kPublicIDs[index].name);
     779               0 :         if (comparison == 0)
     780                 :           break;
     781               0 :         if (comparison < 0)
     782               0 :           maximum = index - 1;
     783                 :         else
     784               0 :           minimum = index + 1;
     785                 : 
     786               0 :         if (maximum < minimum) {
     787                 :           // The DOCTYPE is not in our list, so it must be full_standards.
     788               0 :           aParseMode = eDTDMode_full_standards;
     789               0 :           aDocType = eHTML_Strict;
     790                 :           return;
     791                 :         }
     792                 :       }
     793                 : 
     794               0 :       switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
     795                 :                 ? kPublicIDs[index].mode_if_sysid
     796                 :                 : kPublicIDs[index].mode_if_no_sysid)
     797                 :       {
     798                 :         case PubIDInfo::eQuirks:
     799               0 :           aParseMode = eDTDMode_quirks;
     800               0 :           aDocType = eHTML_Quirks;
     801               0 :           break;
     802                 :         case PubIDInfo::eAlmostStandards:
     803               0 :           aParseMode = eDTDMode_almost_standards;
     804               0 :           aDocType = eHTML_Strict;
     805               0 :           break;
     806                 :         case PubIDInfo::eFullStandards:
     807               0 :           aParseMode = eDTDMode_full_standards;
     808               0 :           aDocType = eHTML_Strict;
     809               0 :           break;
     810                 :         default:
     811               0 :           NS_NOTREACHED("no other cases!");
     812                 :       }
     813                 :     }
     814                 :   } else {
     815                 :     // badly formed DOCTYPE -> quirks
     816              25 :     aParseMode = eDTDMode_quirks;
     817              25 :     aDocType = eHTML_Quirks;
     818                 :   }
     819                 : }
     820                 : 
     821                 : static void
     822            3341 : DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
     823                 :                    eParserDocType& aDocType, const nsACString& aMimeType)
     824                 : {
     825            3341 :   if (aMimeType.EqualsLiteral(TEXT_HTML)) {
     826              28 :     DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
     827           26504 :   } else if (aMimeType.EqualsLiteral(TEXT_PLAIN) ||
     828            3313 :              aMimeType.EqualsLiteral(TEXT_CSS) ||
     829            3313 :              aMimeType.EqualsLiteral(APPLICATION_JAVASCRIPT) ||
     830            3313 :              aMimeType.EqualsLiteral(APPLICATION_XJAVASCRIPT) ||
     831            3313 :              aMimeType.EqualsLiteral(APPLICATION_JSON) ||
     832            3313 :              aMimeType.EqualsLiteral(TEXT_ECMASCRIPT) ||
     833            3313 :              aMimeType.EqualsLiteral(APPLICATION_ECMASCRIPT) ||
     834            3313 :              aMimeType.EqualsLiteral(TEXT_JAVASCRIPT)) {
     835               0 :     aDocType = ePlainText;
     836               0 :     aParseMode = eDTDMode_quirks;
     837                 :   } else { // Some form of XML
     838            3313 :     aDocType = eXML;
     839            3313 :     aParseMode = eDTDMode_full_standards;
     840                 :   }
     841            3341 : }
     842                 : 
     843                 : static nsIDTD*
     844            3342 : FindSuitableDTD(CParserContext& aParserContext)
     845                 : {
     846                 :   // We always find a DTD.
     847            3342 :   aParserContext.mAutoDetectStatus = ePrimaryDetect;
     848                 : 
     849                 :   // Quick check for view source.
     850            3342 :   NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource,
     851                 :     "The old parser is not supposed to be used for View Source anymore.");
     852                 : 
     853                 :   // Now see if we're parsing HTML (which, as far as we're concerned, simply
     854                 :   // means "not XML").
     855            3342 :   if (aParserContext.mDocType != eXML) {
     856              28 :     return new CNavDTD();
     857                 :   }
     858                 : 
     859                 :   // If we're here, then we'd better be parsing XML.
     860            3314 :   NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
     861            3314 :   return new nsExpatDriver();
     862                 : }
     863                 : 
     864                 : NS_IMETHODIMP
     865              65 : nsParser::CancelParsingEvents()
     866                 : {
     867              65 :   if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
     868               0 :     NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
     869                 :     // Revoke the pending continue parsing event
     870               0 :     mContinueEvent = nsnull;
     871               0 :     mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
     872                 :   }
     873              65 :   return NS_OK;
     874                 : }
     875                 : 
     876                 : ////////////////////////////////////////////////////////////////////////
     877                 : 
     878                 : /**
     879                 :  * Evalutes EXPR1 and EXPR2 exactly once each, in that order.  Stores the value
     880                 :  * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
     881                 :  * (which could be success or failure).
     882                 :  *
     883                 :  * To understand the motivation for this construct, consider these example
     884                 :  * methods:
     885                 :  *
     886                 :  *   nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
     887                 :  *     nsresult rv = NS_OK;
     888                 :  *     ...
     889                 :  *     return obj->DoThatThing();
     890                 :  *     NS_ENSURE_SUCCESS(rv, rv);
     891                 :  *     ...
     892                 :  *     return rv;
     893                 :  *   }
     894                 :  *
     895                 :  *   void nsCaller::MakeThingsHappen() {
     896                 :  *     return mSomething->DoThatThing(mWhatever);
     897                 :  *   }
     898                 :  *
     899                 :  * Suppose, for whatever reason*, we want to shift responsibility for calling
     900                 :  * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
     901                 :  * nsCaller::MakeThingsHappen.  We might rewrite the two methods as follows:
     902                 :  *
     903                 :  *   nsresult nsSomething::DoThatThing() {
     904                 :  *     nsresult rv = NS_OK;
     905                 :  *     ...
     906                 :  *     ...
     907                 :  *     return rv;
     908                 :  *   }
     909                 :  *
     910                 :  *   void nsCaller::MakeThingsHappen() {
     911                 :  *     nsresult rv;
     912                 :  *     PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
     913                 :  *                              mWhatever->DoThatThing(),
     914                 :  *                              rv);
     915                 :  *     return rv;
     916                 :  *   }
     917                 :  *
     918                 :  * *Possible reasons include: nsCaller doesn't want to give mSomething access
     919                 :  * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
     920                 :  * be called regardless of how nsSomething::DoThatThing behaves, &c.
     921                 :  */
     922                 : #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) {                          \
     923                 :   nsresult RV##__temp = EXPR1;                                                \
     924                 :   RV = EXPR2;                                                                 \
     925                 :   if (NS_FAILED(RV)) {                                                        \
     926                 :     RV = RV##__temp;                                                          \
     927                 :   }                                                                           \
     928                 : }
     929                 : 
     930                 : /**
     931                 :  * This gets called just prior to the model actually
     932                 :  * being constructed. It's important to make this the
     933                 :  * last thing that happens right before parsing, so we
     934                 :  * can delay until the last moment the resolution of
     935                 :  * which DTD to use (unless of course we're assigned one).
     936                 :  */
     937                 : nsresult
     938            6757 : nsParser::WillBuildModel(nsString& aFilename)
     939                 : {
     940            6757 :   if (!mParserContext)
     941               0 :     return kInvalidParserContext;
     942                 : 
     943            6757 :   if (eUnknownDetect != mParserContext->mAutoDetectStatus)
     944            3415 :     return NS_OK;
     945                 : 
     946            3342 :   if (eDTDMode_unknown == mParserContext->mDTDMode ||
     947                 :       eDTDMode_autodetect == mParserContext->mDTDMode) {
     948                 :     PRUnichar buf[1025];
     949            6682 :     nsFixedString theBuffer(buf, 1024, 0);
     950                 : 
     951                 :     // Grab 1024 characters, starting at the first non-whitespace
     952                 :     // character, to look for the doctype in.
     953            3341 :     mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
     954                 :     DetermineParseMode(theBuffer, mParserContext->mDTDMode,
     955            3341 :                        mParserContext->mDocType, mParserContext->mMimeType);
     956                 :   }
     957                 : 
     958            3342 :   NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
     959                 :                "Clobbering DTD for non-root parser context!");
     960            3342 :   mDTD = FindSuitableDTD(*mParserContext);
     961            3342 :   NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
     962                 : 
     963                 :   nsITokenizer* tokenizer;
     964            3342 :   nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
     965            3342 :   NS_ENSURE_SUCCESS(rv, rv);
     966                 : 
     967            3342 :   rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
     968            3342 :   nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
     969                 :   // nsIDTD::WillBuildModel used to be responsible for calling
     970                 :   // nsIContentSink::WillBuildModel, but that obligation isn't expressible
     971                 :   // in the nsIDTD interface itself, so it's sounder and simpler to give that
     972                 :   // responsibility back to the parser. The former behavior of the DTD was to
     973                 :   // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
     974                 :   // failure we should use sinkResult instead of rv, to preserve the old error
     975                 :   // handling behavior of the DTD:
     976            3342 :   return NS_FAILED(sinkResult) ? sinkResult : rv;
     977                 : }
     978                 : 
     979                 : /**
     980                 :  * This gets called when the parser is done with its input.
     981                 :  * Note that the parser may have been called recursively, so we
     982                 :  * have to check for a prev. context before closing out the DTD/sink.
     983                 :  */
     984                 : nsresult
     985            3342 : nsParser::DidBuildModel(nsresult anErrorCode)
     986                 : {
     987            3342 :   nsresult result = anErrorCode;
     988                 : 
     989            3342 :   if (IsComplete()) {
     990            3342 :     if (mParserContext && !mParserContext->mPrevContext) {
     991                 :       // Let sink know if we're about to end load because we've been terminated.
     992                 :       // In that case we don't want it to run deferred scripts.
     993            3342 :       bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
     994            3342 :       if (mDTD && mSink) {
     995            3342 :         nsresult dtdResult =  mDTD->DidBuildModel(anErrorCode),
     996            3342 :                 sinkResult = mSink->DidBuildModel(terminated);
     997                 :         // nsIDTD::DidBuildModel used to be responsible for calling
     998                 :         // nsIContentSink::DidBuildModel, but that obligation isn't expressible
     999                 :         // in the nsIDTD interface itself, so it's sounder and simpler to give
    1000                 :         // that responsibility back to the parser. The former behavior of the
    1001                 :         // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
    1002                 :         // sink returns failure we should use sinkResult instead of dtdResult,
    1003                 :         // to preserve the old error handling behavior of the DTD:
    1004            3342 :         result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
    1005                 :       }
    1006                 : 
    1007                 :       //Ref. to bug 61462.
    1008            3342 :       mParserContext->mRequest = 0;
    1009                 :     }
    1010                 :   }
    1011                 : 
    1012            3342 :   return result;
    1013                 : }
    1014                 : 
    1015                 : /**
    1016                 :  * This method adds a new parser context to the list,
    1017                 :  * pushing the current one to the next position.
    1018                 :  *
    1019                 :  * @param   ptr to new context
    1020                 :  */
    1021                 : void
    1022            3344 : nsParser::PushContext(CParserContext& aContext)
    1023                 : {
    1024            3344 :   NS_ASSERTION(aContext.mPrevContext == mParserContext,
    1025                 :                "Trying to push a context whose previous context differs from "
    1026                 :                "the current parser context.");
    1027            3344 :   mParserContext = &aContext;
    1028            3344 : }
    1029                 : 
    1030                 : /**
    1031                 :  * This method pops the topmost context off the stack,
    1032                 :  * returning it to the user. The next context  (if any)
    1033                 :  * becomes the current context.
    1034                 :  * @update      gess7/22/98
    1035                 :  * @return  prev. context
    1036                 :  */
    1037                 : CParserContext*
    1038               0 : nsParser::PopContext()
    1039                 : {
    1040               0 :   CParserContext* oldContext = mParserContext;
    1041               0 :   if (oldContext) {
    1042               0 :     mParserContext = oldContext->mPrevContext;
    1043               0 :     if (mParserContext) {
    1044                 :       // If the old context was blocked, propagate the blocked state
    1045                 :       // back to the new one. Also, propagate the stream listener state
    1046                 :       // but don't override onStop state to guarantee the call to DidBuildModel().
    1047               0 :       if (mParserContext->mStreamListenerState != eOnStop) {
    1048               0 :         mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
    1049                 :       }
    1050                 :       // Update the current context's tokenizer to any information gleaned
    1051                 :       // while parsing document.write() calls (such as "a plaintext tag was
    1052                 :       // found")
    1053               0 :       if (mParserContext->mTokenizer) {
    1054               0 :         mParserContext->mTokenizer->CopyState(oldContext->mTokenizer);
    1055                 :       }
    1056                 :     }
    1057                 :   }
    1058               0 :   return oldContext;
    1059                 : }
    1060                 : 
    1061                 : /**
    1062                 :  *  Call this when you want control whether or not the parser will parse
    1063                 :  *  and tokenize input (TRUE), or whether it just caches input to be
    1064                 :  *  parsed later (FALSE).
    1065                 :  *
    1066                 :  *  @param   aState determines whether we parse/tokenize or just cache.
    1067                 :  *  @return  current state
    1068                 :  */
    1069                 : void
    1070               0 : nsParser::SetUnusedInput(nsString& aBuffer)
    1071                 : {
    1072               0 :   mUnusedInput = aBuffer;
    1073               0 : }
    1074                 : 
    1075                 : /**
    1076                 :  *  Call this when you want to *force* the parser to terminate the
    1077                 :  *  parsing process altogether. This is binary -- so once you terminate
    1078                 :  *  you can't resume without restarting altogether.
    1079                 :  */
    1080                 : NS_IMETHODIMP
    1081              65 : nsParser::Terminate(void)
    1082                 : {
    1083                 :   // We should only call DidBuildModel once, so don't do anything if this is
    1084                 :   // the second time that Terminate has been called.
    1085              65 :   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
    1086               0 :     return NS_OK;
    1087                 :   }
    1088                 : 
    1089              65 :   nsresult result = NS_OK;
    1090                 :   // XXX - [ until we figure out a way to break parser-sink circularity ]
    1091                 :   // Hack - Hold a reference until we are completely done...
    1092             130 :   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
    1093              65 :   mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
    1094                 : 
    1095                 :   // CancelParsingEvents must be called to avoid leaking the nsParser object
    1096                 :   // @see bug 108049
    1097                 :   // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
    1098                 :   // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
    1099                 :   // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
    1100              65 :   CancelParsingEvents();
    1101                 : 
    1102                 :   // If we got interrupted in the middle of a document.write, then we might
    1103                 :   // have more than one parser context on our parsercontext stack. This has
    1104                 :   // the effect of making DidBuildModel a no-op, meaning that we never call
    1105                 :   // our sink's DidBuildModel and break the reference cycle, causing a leak.
    1106                 :   // Since we're getting terminated, we manually clean up our context stack.
    1107             130 :   while (mParserContext && mParserContext->mPrevContext) {
    1108               0 :     CParserContext *prev = mParserContext->mPrevContext;
    1109               0 :     delete mParserContext;
    1110               0 :     mParserContext = prev;
    1111                 :   }
    1112                 : 
    1113              65 :   if (mDTD) {
    1114              65 :     mDTD->Terminate();
    1115              65 :     DidBuildModel(result);
    1116               0 :   } else if (mSink) {
    1117                 :     // We have no parser context or no DTD yet (so we got terminated before we
    1118                 :     // got any data).  Manually break the reference cycle with the sink.
    1119               0 :     result = mSink->DidBuildModel(true);
    1120               0 :     NS_ENSURE_SUCCESS(result, result);
    1121                 :   }
    1122                 : 
    1123              65 :   return NS_OK;
    1124                 : }
    1125                 : 
    1126                 : NS_IMETHODIMP
    1127               0 : nsParser::ContinueInterruptedParsing()
    1128                 : {
    1129                 :   // If there are scripts executing, then the content sink is jumping the gun
    1130                 :   // (probably due to a synchronous XMLHttpRequest) and will re-enable us
    1131                 :   // later, see bug 460706.
    1132               0 :   if (!IsOkToProcessNetworkData()) {
    1133               0 :     return NS_OK;
    1134                 :   }
    1135                 : 
    1136                 :   // If the stream has already finished, there's a good chance
    1137                 :   // that we might start closing things down when the parser
    1138                 :   // is reenabled. To make sure that we're not deleted across
    1139                 :   // the reenabling process, hold a reference to ourselves.
    1140               0 :   nsresult result=NS_OK;
    1141               0 :   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
    1142                 : 
    1143                 : #ifdef DEBUG
    1144               0 :   if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
    1145               0 :     NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
    1146                 :   }
    1147                 : #endif
    1148                 : 
    1149                 :   bool isFinalChunk = mParserContext &&
    1150               0 :                         mParserContext->mStreamListenerState == eOnStop;
    1151                 : 
    1152               0 :   mProcessingNetworkData = true;
    1153               0 :   if (mSink) {
    1154               0 :     mSink->WillParse();
    1155                 :   }
    1156               0 :   result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
    1157               0 :   mProcessingNetworkData = false;
    1158                 : 
    1159               0 :   if (result != NS_OK) {
    1160               0 :     result=mInternalState;
    1161                 :   }
    1162                 : 
    1163               0 :   return result;
    1164                 : }
    1165                 : 
    1166                 : /**
    1167                 :  *  Stops parsing temporarily. That's it will prevent the
    1168                 :  *  parser from building up content model.
    1169                 :  */
    1170                 : NS_IMETHODIMP_(void)
    1171               0 : nsParser::BlockParser()
    1172                 : {
    1173               0 :   mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
    1174               0 : }
    1175                 : 
    1176                 : /**
    1177                 :  *  Open up the parser for tokenization, building up content
    1178                 :  *  model..etc. However, this method does not resume parsing
    1179                 :  *  automatically. It's the callers' responsibility to restart
    1180                 :  *  the parsing engine.
    1181                 :  */
    1182                 : NS_IMETHODIMP_(void)
    1183               0 : nsParser::UnblockParser()
    1184                 : {
    1185               0 :   if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
    1186               0 :     mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
    1187                 :   } else {
    1188               0 :     NS_WARNING("Trying to unblock an unblocked parser.");
    1189                 :   }
    1190               0 : }
    1191                 : 
    1192                 : NS_IMETHODIMP_(void)
    1193               0 : nsParser::ContinueInterruptedParsingAsync()
    1194                 : {
    1195               0 :   mSink->ContinueInterruptedParsingAsync();
    1196               0 : }
    1197                 : 
    1198                 : /**
    1199                 :  * Call this to query whether the parser is enabled or not.
    1200                 :  */
    1201                 : NS_IMETHODIMP_(bool)
    1202               0 : nsParser::IsParserEnabled()
    1203                 : {
    1204               0 :   return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
    1205                 : }
    1206                 : 
    1207                 : /**
    1208                 :  * Call this to query whether the parser thinks it's done with parsing.
    1209                 :  */
    1210                 : NS_IMETHODIMP_(bool)
    1211            3342 : nsParser::IsComplete()
    1212                 : {
    1213            3342 :   return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
    1214                 : }
    1215                 : 
    1216                 : 
    1217               0 : void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
    1218                 : {
    1219                 :   // Ignore any revoked continue events...
    1220               0 :   if (mContinueEvent != ev)
    1221               0 :     return;
    1222                 : 
    1223               0 :   mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
    1224               0 :   mContinueEvent = nsnull;
    1225                 : 
    1226               0 :   NS_ASSERTION(IsOkToProcessNetworkData(),
    1227                 :                "Interrupted in the middle of a script?");
    1228               0 :   ContinueInterruptedParsing();
    1229                 : }
    1230                 : 
    1231                 : bool
    1232               0 : nsParser::IsInsertionPointDefined()
    1233                 : {
    1234               0 :   return true;
    1235                 : }
    1236                 : 
    1237                 : void
    1238               0 : nsParser::BeginEvaluatingParserInsertedScript()
    1239                 : {
    1240               0 : }
    1241                 : 
    1242                 : void
    1243               0 : nsParser::EndEvaluatingParserInsertedScript()
    1244                 : {
    1245               0 : }
    1246                 : 
    1247                 : void
    1248               0 : nsParser::MarkAsNotScriptCreated(const char* aCommand)
    1249                 : {
    1250               0 : }
    1251                 : 
    1252                 : bool
    1253               0 : nsParser::IsScriptCreated()
    1254                 : {
    1255               0 :   return false;
    1256                 : }
    1257                 : 
    1258                 : /**
    1259                 :  *  This is the main controlling routine in the parsing process.
    1260                 :  *  Note that it may get called multiple times for the same scanner,
    1261                 :  *  since this is a pushed based system, and all the tokens may
    1262                 :  *  not have been consumed by the scanner during a given invocation
    1263                 :  *  of this method.
    1264                 :  */
    1265                 : NS_IMETHODIMP
    1266            3343 : nsParser::Parse(nsIURI* aURL,
    1267                 :                 nsIRequestObserver* aListener,
    1268                 :                 void* aKey,
    1269                 :                 nsDTDMode aMode)
    1270                 : {
    1271                 : 
    1272            3343 :   NS_PRECONDITION(aURL, "Error: Null URL given");
    1273                 : 
    1274            3343 :   nsresult result=kBadURL;
    1275            3343 :   mObserver = aListener;
    1276                 : 
    1277            3343 :   if (aURL) {
    1278            6686 :     nsCAutoString spec;
    1279            3343 :     nsresult rv = aURL->GetSpec(spec);
    1280            3343 :     if (rv != NS_OK) {
    1281               0 :       return rv;
    1282                 :     }
    1283           10029 :     NS_ConvertUTF8toUTF16 theName(spec);
    1284                 : 
    1285                 :     nsScanner* theScanner = new nsScanner(theName, false, mCharset,
    1286            3343 :                                           mCharsetSource);
    1287                 :     CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
    1288            3343 :                                             mCommand, aListener);
    1289            3343 :     if (pc && theScanner) {
    1290            3343 :       pc->mMultipart = true;
    1291            3343 :       pc->mContextType = CParserContext::eCTURL;
    1292            3343 :       pc->mDTDMode = aMode;
    1293            3343 :       PushContext(*pc);
    1294                 : 
    1295            3343 :       result = NS_OK;
    1296                 :     } else {
    1297               0 :       result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
    1298                 :     }
    1299                 :   }
    1300            3343 :   return result;
    1301                 : }
    1302                 : 
    1303                 : /**
    1304                 :  * Call this method if all you want to do is parse 1 string full of HTML text.
    1305                 :  * In particular, this method should be called by the DOM when it has an HTML
    1306                 :  * string to feed to the parser in real-time.
    1307                 :  *
    1308                 :  * @param   aSourceBuffer contains a string-full of real content
    1309                 :  * @param   aMimeType tells us what type of content to expect in the given string
    1310                 :  */
    1311                 : NS_IMETHODIMP
    1312               3 : nsParser::Parse(const nsAString& aSourceBuffer,
    1313                 :                 void* aKey,
    1314                 :                 const nsACString& aMimeType,
    1315                 :                 bool aLastCall,
    1316                 :                 nsDTDMode aMode)
    1317                 : {
    1318               3 :   nsresult result = NS_OK;
    1319                 : 
    1320                 :   // Don't bother if we're never going to parse this.
    1321               3 :   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
    1322               0 :     return result;
    1323                 :   }
    1324                 : 
    1325               3 :   if (!aLastCall && aSourceBuffer.IsEmpty()) {
    1326                 :     // Nothing is being passed to the parser so return
    1327                 :     // immediately. mUnusedInput will get processed when
    1328                 :     // some data is actually passed in.
    1329                 :     // But if this is the last call, make sure to finish up
    1330                 :     // stuff correctly.
    1331               0 :     return result;
    1332                 :   }
    1333                 : 
    1334                 :   // Hack to pass on to the dtd the caller's desire to
    1335                 :   // parse a fragment without worrying about containment rules
    1336               3 :   if (aMode == eDTDMode_fragment)
    1337               0 :     mCommand = eViewFragment;
    1338                 : 
    1339                 :   // Maintain a reference to ourselves so we don't go away
    1340                 :   // till we're completely done.
    1341               6 :   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
    1342                 : 
    1343               3 :   if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
    1344                 :     // Note: The following code will always find the parser context associated
    1345                 :     // with the given key, even if that context has been suspended (e.g., for
    1346                 :     // another document.write call). This doesn't appear to be exactly what IE
    1347                 :     // does in the case where this happens, but this makes more sense.
    1348               3 :     CParserContext* pc = mParserContext;
    1349               6 :     while (pc && pc->mKey != aKey) {
    1350               0 :       pc = pc->mPrevContext;
    1351                 :     }
    1352                 : 
    1353               3 :     if (!pc) {
    1354                 :       // Only make a new context if we don't have one, OR if we do, but has a
    1355                 :       // different context key.
    1356               1 :       nsScanner* theScanner = new nsScanner(mUnusedInput, mCharset, mCharsetSource);
    1357               1 :       NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
    1358                 : 
    1359               1 :       eAutoDetectResult theStatus = eUnknownDetect;
    1360                 : 
    1361               1 :       if (mParserContext && mParserContext->mMimeType == aMimeType) {
    1362                 :         // Ref. Bug 90379
    1363               0 :         NS_ASSERTION(mDTD, "How come the DTD is null?");
    1364                 : 
    1365               0 :         if (mParserContext) {
    1366               0 :           theStatus = mParserContext->mAutoDetectStatus;
    1367                 :           // Added this to fix bug 32022.
    1368                 :         }
    1369                 :       }
    1370                 : 
    1371                 :       pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
    1372               1 :                               0, theStatus, aLastCall);
    1373               1 :       NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
    1374                 : 
    1375               1 :       PushContext(*pc);
    1376                 : 
    1377               1 :       pc->mMultipart = !aLastCall; // By default
    1378               1 :       if (pc->mPrevContext) {
    1379               0 :         pc->mMultipart |= pc->mPrevContext->mMultipart;
    1380                 :       }
    1381                 : 
    1382                 :       // Start fix bug 40143
    1383               1 :       if (pc->mMultipart) {
    1384               1 :         pc->mStreamListenerState = eOnDataAvail;
    1385               1 :         if (pc->mScanner) {
    1386               1 :           pc->mScanner->SetIncremental(true);
    1387                 :         }
    1388                 :       } else {
    1389               0 :         pc->mStreamListenerState = eOnStop;
    1390               0 :         if (pc->mScanner) {
    1391               0 :           pc->mScanner->SetIncremental(false);
    1392                 :         }
    1393                 :       }
    1394                 :       // end fix for 40143
    1395                 : 
    1396               1 :       pc->mContextType=CParserContext::eCTString;
    1397               1 :       pc->SetMimeType(aMimeType);
    1398               1 :       if (pc->mPrevContext && aMode == eDTDMode_autodetect) {
    1399                 :         // Preserve the DTD mode from the last context, bug 265814.
    1400               0 :         pc->mDTDMode = pc->mPrevContext->mDTDMode;
    1401                 :       } else {
    1402               1 :         pc->mDTDMode = aMode;
    1403                 :       }
    1404                 : 
    1405               1 :       mUnusedInput.Truncate();
    1406                 : 
    1407               1 :       pc->mScanner->Append(aSourceBuffer);
    1408                 :       // Do not interrupt document.write() - bug 95487
    1409               1 :       result = ResumeParse(false, false, false);
    1410                 :     } else {
    1411               2 :       pc->mScanner->Append(aSourceBuffer);
    1412               2 :       if (!pc->mPrevContext) {
    1413                 :         // Set stream listener state to eOnStop, on the final context - Fix 68160,
    1414                 :         // to guarantee DidBuildModel() call - Fix 36148
    1415               2 :         if (aLastCall) {
    1416               1 :           pc->mStreamListenerState = eOnStop;
    1417               1 :           pc->mScanner->SetIncremental(false);
    1418                 :         }
    1419                 : 
    1420               2 :         if (pc == mParserContext) {
    1421                 :           // If pc is not mParserContext, then this call to ResumeParse would
    1422                 :           // do the wrong thing and try to continue parsing using
    1423                 :           // mParserContext. We need to wait to actually resume parsing on pc.
    1424               2 :           ResumeParse(false, false, false);
    1425                 :         }
    1426                 :       }
    1427                 :     }
    1428                 :   }
    1429                 : 
    1430               3 :   return result;
    1431                 : }
    1432                 : 
    1433                 : NS_IMETHODIMP
    1434               1 : nsParser::ParseFragment(const nsAString& aSourceBuffer,
    1435                 :                         nsTArray<nsString>& aTagStack)
    1436                 : {
    1437               1 :   nsresult result = NS_OK;
    1438               2 :   nsAutoString  theContext;
    1439               1 :   PRUint32 theCount = aTagStack.Length();
    1440               1 :   PRUint32 theIndex = 0;
    1441                 : 
    1442                 :   // Disable observers for fragments
    1443               1 :   mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
    1444                 : 
    1445               2 :   for (theIndex = 0; theIndex < theCount; theIndex++) {
    1446               1 :     theContext.AppendLiteral("<");
    1447               1 :     theContext.Append(aTagStack[theCount - theIndex - 1]);
    1448               1 :     theContext.AppendLiteral(">");
    1449                 :   }
    1450                 : 
    1451               1 :   if (theCount == 0) {
    1452                 :     // Ensure that the buffer is not empty. Because none of the DTDs care
    1453                 :     // about leading whitespace, this doesn't change the result.
    1454               0 :     theContext.AssignLiteral(" ");
    1455                 :   }
    1456                 : 
    1457                 :   // First, parse the context to build up the DTD's tag stack. Note that we
    1458                 :   // pass false for the aLastCall parameter.
    1459                 :   result = Parse(theContext,
    1460                 :                  (void*)&theContext,
    1461               1 :                  NS_LITERAL_CSTRING("application/xml"),
    1462                 :                  false,
    1463               1 :                  eDTDMode_full_standards);
    1464               1 :   if (NS_FAILED(result)) {
    1465               0 :     mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
    1466               0 :     return result;
    1467                 :   }
    1468                 : 
    1469               1 :   if (!mSink) {
    1470                 :     // Parse must have failed in the XML case and so the sink was killed.
    1471               0 :     return NS_ERROR_HTMLPARSER_STOPPARSING;
    1472                 :   }
    1473                 : 
    1474               2 :   nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
    1475               1 :   NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
    1476                 : 
    1477               1 :   fragSink->WillBuildContent();
    1478                 :   // Now, parse the actual content. Note that this is the last call
    1479                 :   // for HTML content, but for XML, we will want to build and parse
    1480                 :   // the end tags.  However, if tagStack is empty, it's the last call
    1481                 :   // for XML as well.
    1482               1 :   if (theCount == 0) {
    1483                 :     result = Parse(aSourceBuffer,
    1484                 :                    &theContext,
    1485               0 :                    NS_LITERAL_CSTRING("application/xml"),
    1486                 :                    true,
    1487               0 :                    eDTDMode_full_standards);
    1488               0 :     fragSink->DidBuildContent();
    1489                 :   } else {
    1490                 :     // Add an end tag chunk, so expat will read the whole source buffer,
    1491                 :     // and not worry about ']]' etc.
    1492               2 :     result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
    1493                 :                    &theContext,
    1494               1 :                    NS_LITERAL_CSTRING("application/xml"),
    1495                 :                    false,
    1496               2 :                    eDTDMode_full_standards);
    1497               1 :     fragSink->DidBuildContent();
    1498                 : 
    1499               1 :     if (NS_SUCCEEDED(result)) {
    1500               2 :       nsAutoString endContext;
    1501               2 :       for (theIndex = 0; theIndex < theCount; theIndex++) {
    1502                 :          // we already added an end tag chunk above
    1503               1 :         if (theIndex > 0) {
    1504               0 :           endContext.AppendLiteral("</");
    1505                 :         }
    1506                 : 
    1507               1 :         nsString& thisTag = aTagStack[theIndex];
    1508                 :         // was there an xmlns=?
    1509               1 :         PRInt32 endOfTag = thisTag.FindChar(PRUnichar(' '));
    1510               1 :         if (endOfTag == -1) {
    1511               0 :           endContext.Append(thisTag);
    1512                 :         } else {
    1513               1 :           endContext.Append(Substring(thisTag,0,endOfTag));
    1514                 :         }
    1515                 : 
    1516               1 :         endContext.AppendLiteral(">");
    1517                 :       }
    1518                 : 
    1519                 :       result = Parse(endContext,
    1520                 :                      &theContext,
    1521               1 :                      NS_LITERAL_CSTRING("application/xml"),
    1522                 :                      true,
    1523               1 :                      eDTDMode_full_standards);
    1524                 :     }
    1525                 :   }
    1526                 : 
    1527               1 :   mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
    1528                 : 
    1529               1 :   return result;
    1530                 : }
    1531                 : 
    1532                 : /**
    1533                 :  *  This routine is called to cause the parser to continue parsing its
    1534                 :  *  underlying stream.  This call allows the parse process to happen in
    1535                 :  *  chunks, such as when the content is push based, and we need to parse in
    1536                 :  *  pieces.
    1537                 :  *
    1538                 :  *  An interesting change in how the parser gets used has led us to add extra
    1539                 :  *  processing to this method.  The case occurs when the parser is blocked in
    1540                 :  *  one context, and gets a parse(string) call in another context.  In this
    1541                 :  *  case, the parserContexts are linked. No problem.
    1542                 :  *
    1543                 :  *  The problem is that Parse(string) assumes that it can proceed unabated,
    1544                 :  *  but if the parser is already blocked that assumption is false. So we
    1545                 :  *  needed to add a mechanism here to allow the parser to continue to process
    1546                 :  *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
    1547                 :  *  out of contexts.
    1548                 :  *
    1549                 :  *
    1550                 :  *  @param   allowItertion : set to true if non-script resumption is requested
    1551                 :  *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
    1552                 :  *  @return  error code -- 0 if ok, non-zero if error.
    1553                 :  */
    1554                 : nsresult
    1555            6768 : nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
    1556                 :                       bool aCanInterrupt)
    1557                 : {
    1558            6768 :   nsresult result = NS_OK;
    1559                 : 
    1560            6768 :   if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
    1561                 :       mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
    1562                 : 
    1563            6757 :     result = WillBuildModel(mParserContext->mScanner->GetFilename());
    1564            6757 :     if (NS_FAILED(result)) {
    1565               0 :       mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
    1566               0 :       return result;
    1567                 :     }
    1568                 : 
    1569            6757 :     if (mDTD) {
    1570            6757 :       mSink->WillResume();
    1571            6757 :       bool theIterationIsOk = true;
    1572                 : 
    1573           16929 :       while (result == NS_OK && theIterationIsOk) {
    1574            6757 :         if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
    1575                 :           // -- Ref: Bug# 22485 --
    1576                 :           // Insert the unused input into the source buffer
    1577                 :           // as if it was read from the input stream.
    1578                 :           // Adding UngetReadable() per vidur!!
    1579               0 :           mParserContext->mScanner->UngetReadable(mUnusedInput);
    1580               0 :           mUnusedInput.Truncate(0);
    1581                 :         }
    1582                 : 
    1583                 :         // Only allow parsing to be interrupted in the subsequent call to
    1584                 :         // build model.
    1585                 :         nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
    1586            6757 :                                       ? Tokenize(aIsFinalChunk)
    1587           13514 :                                       : NS_OK;
    1588            6757 :         result = BuildModel();
    1589                 : 
    1590            6757 :         if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
    1591               0 :           PostContinueEvent();
    1592                 :         }
    1593                 : 
    1594                 :         theIterationIsOk = theTokenizerResult != kEOF &&
    1595            6757 :                            result != NS_ERROR_HTMLPARSER_INTERRUPTED;
    1596                 : 
    1597                 :         // Make sure not to stop parsing too early. Therefore, before shutting
    1598                 :         // down the parser, it's important to check whether the input buffer
    1599                 :         // has been scanned to completion (theTokenizerResult should be kEOF).
    1600                 :         // kEOF -> End of buffer.
    1601                 : 
    1602                 :         // If we're told to block the parser, we disable all further parsing
    1603                 :         // (and cache any data coming in) until the parser is re-enabled.
    1604            6757 :         if (NS_ERROR_HTMLPARSER_BLOCK == result) {
    1605               0 :           mSink->WillInterrupt();
    1606               0 :           if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
    1607                 :             // If we were blocked by a recursive invocation, don't re-block.
    1608               0 :             BlockParser();
    1609                 :           }
    1610               0 :           return NS_OK;
    1611                 :         }
    1612            6757 :         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
    1613                 :           // Note: Parser Terminate() calls DidBuildModel.
    1614              65 :           if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
    1615               0 :             DidBuildModel(mStreamStatus);
    1616               0 :             mInternalState = result;
    1617                 :           }
    1618                 : 
    1619              65 :           return NS_OK;
    1620                 :         }
    1621            6692 :         if ((NS_OK == result && theTokenizerResult == kEOF) ||
    1622                 :              result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
    1623                 :           bool theContextIsStringBased =
    1624            6692 :             CParserContext::eCTString == mParserContext->mContextType;
    1625                 : 
    1626           10107 :           if (mParserContext->mStreamListenerState == eOnStop ||
    1627            3415 :               !mParserContext->mMultipart || theContextIsStringBased) {
    1628            3279 :             if (!mParserContext->mPrevContext) {
    1629            3279 :               if (mParserContext->mStreamListenerState == eOnStop) {
    1630            3277 :                 DidBuildModel(mStreamStatus);
    1631            3277 :                 return NS_OK;
    1632                 :               }
    1633                 :             } else {
    1634               0 :               CParserContext* theContext = PopContext();
    1635               0 :               if (theContext) {
    1636               0 :                 theIterationIsOk = allowIteration && theContextIsStringBased;
    1637               0 :                 if (theContext->mCopyUnused) {
    1638               0 :                   theContext->mScanner->CopyUnusedData(mUnusedInput);
    1639                 :                 }
    1640                 : 
    1641               0 :                 delete theContext;
    1642                 :               }
    1643                 : 
    1644               0 :               result = mInternalState;
    1645                 :               aIsFinalChunk = mParserContext &&
    1646               0 :                               mParserContext->mStreamListenerState == eOnStop;
    1647                 :               // ...then intentionally fall through to mSink->WillInterrupt()...
    1648                 :             }
    1649                 :           }
    1650                 :         }
    1651                 : 
    1652            3415 :         if (theTokenizerResult == kEOF ||
    1653                 :             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
    1654            3415 :           result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
    1655            3415 :           mSink->WillInterrupt();
    1656                 :         }
    1657                 :       }
    1658                 :     } else {
    1659               0 :       mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
    1660                 :     }
    1661                 :   }
    1662                 : 
    1663            3426 :   return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
    1664                 : }
    1665                 : 
    1666                 : /**
    1667                 :  *  This is where we loop over the tokens created in the
    1668                 :  *  tokenization phase, and try to make sense out of them.
    1669                 :  */
    1670                 : nsresult
    1671            6757 : nsParser::BuildModel()
    1672                 : {
    1673            6757 :   nsITokenizer* theTokenizer = nsnull;
    1674                 : 
    1675            6757 :   nsresult result = NS_OK;
    1676            6757 :   if (mParserContext) {
    1677            6757 :     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
    1678                 :   }
    1679                 : 
    1680            6757 :   if (NS_SUCCEEDED(result)) {
    1681            6757 :     if (mDTD) {
    1682            6757 :       bool inDocWrite = !!mParserContext->mPrevContext;
    1683            6757 :       result = mDTD->BuildModel(theTokenizer,
    1684                 :                                 // ignore interruptions in document.write
    1685            6757 :                                 !inDocWrite, // don't count lines in document.write
    1686           13514 :                                 &mCharset);
    1687                 :     }
    1688                 :   } else {
    1689               0 :     mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
    1690                 :   }
    1691            6757 :   return result;
    1692                 : }
    1693                 : 
    1694                 : /*******************************************************************
    1695                 :   These methods are used to talk to the netlib system...
    1696                 :  *******************************************************************/
    1697                 : 
    1698                 : nsresult
    1699            3341 : nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
    1700                 : {
    1701            3341 :   NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
    1702                 :                   "Parser's nsIStreamListener API was not setup "
    1703                 :                   "correctly in constructor.");
    1704            3341 :   if (mObserver) {
    1705               0 :     mObserver->OnStartRequest(request, aContext);
    1706                 :   }
    1707            3341 :   mParserContext->mStreamListenerState = eOnStart;
    1708            3341 :   mParserContext->mAutoDetectStatus = eUnknownDetect;
    1709            3341 :   mParserContext->mRequest = request;
    1710                 : 
    1711            3341 :   NS_ASSERTION(!mParserContext->mPrevContext,
    1712                 :                "Clobbering DTD for non-root parser context!");
    1713            3341 :   mDTD = nsnull;
    1714                 : 
    1715                 :   nsresult rv;
    1716            6682 :   nsCAutoString contentType;
    1717            6682 :   nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
    1718            3341 :   if (channel) {
    1719            3341 :     rv = channel->GetContentType(contentType);
    1720            3341 :     if (NS_SUCCEEDED(rv)) {
    1721            3341 :       mParserContext->SetMimeType(contentType);
    1722                 :     }
    1723                 :   }
    1724                 : 
    1725            3341 :   rv = NS_OK;
    1726                 : 
    1727            3341 :   return rv;
    1728                 : }
    1729                 : 
    1730                 : 
    1731                 : #define UTF16_BOM "UTF-16"
    1732                 : #define UTF16_BE "UTF-16BE"
    1733                 : #define UTF16_LE "UTF-16LE"
    1734                 : #define UTF8 "UTF-8"
    1735                 : 
    1736               0 : static inline bool IsSecondMarker(unsigned char aChar)
    1737                 : {
    1738               0 :   switch (aChar) {
    1739                 :     case '!':
    1740                 :     case '?':
    1741                 :     case 'h':
    1742                 :     case 'H':
    1743               0 :       return true;
    1744                 :     default:
    1745               0 :       return false;
    1746                 :   }
    1747                 : }
    1748                 : 
    1749                 : static bool
    1750            2446 : DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen,
    1751                 :                     nsCString& oCharset, PRInt32& oCharsetSource)
    1752                 : {
    1753            2446 :  oCharsetSource= kCharsetFromAutoDetection;
    1754            2446 :  oCharset.Truncate();
    1755                 :  // See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
    1756                 :  // for details
    1757                 :  // Also, MS Win2K notepad now generate 3 bytes BOM in UTF8 as UTF8 signature
    1758                 :  // We need to check that
    1759                 :  // UCS2 BOM FEFF = UTF8 EF BB BF
    1760            2446 :  switch(aBytes[0])
    1761                 :          {
    1762                 :    case 0x00:
    1763               0 :      if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
    1764                 :         // 00 3C 00
    1765               0 :         if(IsSecondMarker(aBytes[3])) {
    1766                 :            // 00 3C 00 SM UTF-16,  big-endian, no Byte Order Mark 
    1767               0 :            oCharset.Assign(UTF16_BE); 
    1768               0 :            oCharsetSource = kCharsetFromByteOrderMark;
    1769                 :         } 
    1770                 :      }
    1771               0 :    break;
    1772                 :    case 0x3C:
    1773            2446 :      if(0x00==aBytes[1] && (0x00==aBytes[3])) {
    1774                 :         // 3C 00 XX 00
    1775               0 :         if(IsSecondMarker(aBytes[2])) {
    1776                 :            // 3C 00 SM 00 UTF-16,  little-endian, no Byte Order Mark 
    1777               0 :            oCharset.Assign(UTF16_LE); 
    1778               0 :            oCharsetSource = kCharsetFromByteOrderMark;
    1779                 :         } 
    1780                 :      // For html, meta tag detector is invoked before this so that we have 
    1781                 :      // to deal only with XML here.
    1782            9634 :      } else if(                     (0x3F==aBytes[1]) &&
    1783            4792 :                (0x78==aBytes[2]) && (0x6D==aBytes[3]) &&
    1784            2396 :                (0 == PL_strncmp("<?xml", (char*)aBytes, 5 ))) {
    1785                 :        // 3C 3F 78 6D
    1786                 :        // ASCII characters are in their normal positions, so we can safely
    1787                 :        // deal with the XML declaration in the old C way
    1788                 :        // The shortest string so far (strlen==5):
    1789                 :        // <?xml
    1790                 :        PRInt32 i;
    1791            2396 :        bool versionFound = false, encodingFound = false;
    1792           24283 :        for (i=6; i < aLen && !encodingFound; ++i) {
    1793                 :          // end of XML declaration?
    1794           25545 :          if ((((char*)aBytes)[i] == '?') && 
    1795                 :            ((i+1) < aLen) &&
    1796            1829 :            (((char*)aBytes)[i+1] == '>')) {
    1797            1829 :            break;
    1798                 :          }
    1799                 :          // Version is required.
    1800           21887 :          if (!versionFound) {
    1801                 :            // Want to avoid string comparisons, hence looking for 'n'
    1802                 :            // and only if found check the string leading to it. Not
    1803                 :            // foolproof, but fast.
    1804                 :            // The shortest string allowed before this is  (strlen==13):
    1805                 :            // <?xml version
    1806           19168 :            if ((((char*)aBytes)[i] == 'n') &&
    1807                 :              (i >= 12) && 
    1808            2396 :              (0 == PL_strncmp("versio", (char*)(aBytes+i-6), 6 ))) {
    1809                 :              // Fast forward through version
    1810            2396 :              char q = 0;
    1811           14376 :              for (++i; i < aLen; ++i) {
    1812           14376 :                char qi = ((char*)aBytes)[i];
    1813           14376 :                if (qi == '\'' || qi == '"') {
    1814            4792 :                  if (q && q == qi) {
    1815                 :                    //  ending quote
    1816            2396 :                    versionFound = true;
    1817            2396 :                    break;
    1818                 :                  } else {
    1819                 :                    // Starting quote
    1820            2396 :                    q = qi;
    1821                 :                  }
    1822                 :                }
    1823                 :              }
    1824                 :            }
    1825                 :          } else {
    1826                 :            // encoding must follow version
    1827                 :            // Want to avoid string comparisons, hence looking for 'g'
    1828                 :            // and only if found check the string leading to it. Not
    1829                 :            // foolproof, but fast.
    1830                 :            // The shortest allowed string before this (strlen==26):
    1831                 :            // <?xml version="1" encoding
    1832            5681 :            if ((((char*)aBytes)[i] == 'g') &&
    1833                 :              (i >= 25) && 
    1834             566 :              (0 == PL_strncmp("encodin", (char*)(aBytes+i-7), 7 ))) {
    1835             566 :              PRInt32 encStart = 0;
    1836             566 :              char q = 0;
    1837            5298 :              for (++i; i < aLen; ++i) {
    1838            5298 :                char qi = ((char*)aBytes)[i];
    1839            5298 :                if (qi == '\'' || qi == '"') {
    1840            1132 :                  if (q && q == qi) {
    1841             566 :                    PRInt32 count = i - encStart;
    1842                 :                    // encoding value is invalid if it is UTF-16
    1843            1132 :                    if (count > 0 && 
    1844             566 :                      (0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) {
    1845             566 :                      oCharset.Assign((char*)(aBytes+encStart),count);
    1846             566 :                      oCharsetSource = kCharsetFromMetaTag;
    1847                 :                    }
    1848             566 :                    encodingFound = true;
    1849             566 :                    break;
    1850                 :                  } else {
    1851             566 :                    encStart = i+1;
    1852             566 :                    q = qi;
    1853                 :                  }
    1854                 :                }
    1855                 :              }
    1856                 :            }
    1857                 :          } // if (!versionFound)
    1858                 :        } // for
    1859                 :      }
    1860            2446 :    break;
    1861                 :    case 0xEF:  
    1862               0 :      if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) {
    1863                 :         // EF BB BF
    1864                 :         // Win2K UTF-8 BOM
    1865               0 :         oCharset.Assign(UTF8); 
    1866               0 :         oCharsetSource= kCharsetFromByteOrderMark;
    1867                 :      }
    1868               0 :    break;
    1869                 :    case 0xFE:
    1870               0 :      if(0xFF==aBytes[1]) {
    1871                 :         // FE FF UTF-16, big-endian 
    1872               0 :         oCharset.Assign(UTF16_BOM); 
    1873               0 :         oCharsetSource= kCharsetFromByteOrderMark;
    1874                 :      }
    1875               0 :    break;
    1876                 :    case 0xFF:
    1877               0 :      if(0xFE==aBytes[1]) {
    1878                 :        // FF FE
    1879                 :        // UTF-16, little-endian 
    1880               0 :        oCharset.Assign(UTF16_BOM); 
    1881               0 :        oCharsetSource= kCharsetFromByteOrderMark;
    1882                 :      }
    1883               0 :    break;
    1884                 :    // case 0x4C: if((0x6F==aBytes[1]) && ((0xA7==aBytes[2] && (0x94==aBytes[3])) {
    1885                 :    //   We do not care EBCIDIC here....
    1886                 :    // }
    1887                 :    // break;
    1888                 :  }  // switch
    1889            2446 :  return !oCharset.IsEmpty();
    1890                 : }
    1891                 : 
    1892                 : inline const char
    1893            2675 : GetNextChar(nsACString::const_iterator& aStart,
    1894                 :             nsACString::const_iterator& aEnd)
    1895                 : {
    1896            2675 :   NS_ASSERTION(aStart != aEnd, "end of buffer");
    1897            2675 :   return (++aStart != aEnd) ? *aStart : '\0';
    1898                 : }
    1899                 : 
    1900                 : bool
    1901            2471 : nsParser::DetectMetaTag(const char* aBytes,
    1902                 :                         PRInt32 aLen,
    1903                 :                         nsCString& aCharset,
    1904                 :                         PRInt32& aCharsetSource)
    1905                 : {
    1906            2471 :   aCharsetSource= kCharsetFromMetaTag;
    1907            2471 :   aCharset.SetLength(0);
    1908                 : 
    1909                 :   // XXX Only look inside HTML documents for now. For XML
    1910                 :   // documents we should be looking inside the XMLDecl.
    1911            2471 :   if (!mParserContext->mMimeType.EqualsLiteral(TEXT_HTML)) {
    1912            2446 :     return false;
    1913                 :   }
    1914                 : 
    1915                 :   // Fast and loose parsing to determine if we have a complete
    1916                 :   // META tag in this block, looking upto 2k into it.
    1917                 :   const nsASingleFragmentCString& str =
    1918              50 :       Substring(aBytes, aBytes + NS_MIN(aLen, 2048));
    1919                 :   // XXXldb Should be const_char_iterator when FindInReadable supports it.
    1920              25 :   nsACString::const_iterator begin, end;
    1921                 : 
    1922              25 :   str.BeginReading(begin);
    1923              25 :   str.EndReading(end);
    1924              25 :   nsACString::const_iterator currPos(begin);
    1925              25 :   nsACString::const_iterator tokEnd;
    1926              25 :   nsACString::const_iterator tagEnd(begin);
    1927                 : 
    1928             100 :   while (currPos != end) {
    1929              75 :     if (!FindCharInReadable('<', currPos, end))
    1930               0 :       break; // no tag found in this buffer
    1931                 : 
    1932              75 :     if (GetNextChar(currPos, end) == '!') {
    1933              75 :       if (GetNextChar(currPos, end) != '-' ||
    1934              25 :           GetNextChar(currPos, end) != '-') {
    1935                 :         // If we only see a <! not followed by --, just skip to the next >.
    1936              25 :         if (!FindCharInReadable('>', currPos, end)) {
    1937               0 :           return false; // No more tags to follow.
    1938                 :         }
    1939                 : 
    1940                 :         // Continue searching for a meta tag following this "comment".
    1941              25 :         ++currPos;
    1942              25 :         continue;
    1943                 :       }
    1944                 : 
    1945                 :       // Found MDO ( <!-- ). Now search for MDC ( --[*s]> )
    1946              25 :       bool foundMDC = false;
    1947              25 :       bool foundMatch = false;
    1948            2550 :       while (!foundMDC) {
    1949            2525 :         if (GetNextChar(currPos, end) == '-' &&
    1950              25 :             GetNextChar(currPos, end) == '-') {
    1951              25 :           foundMatch = !foundMatch; // toggle until we've matching "--"
    1952            2475 :         } else if (currPos == end) {
    1953               0 :           return false; // Couldn't find --[*s]> in this buffer
    1954            2475 :         } else if (foundMatch && *currPos == '>') {
    1955              25 :           foundMDC = true; // found comment end delimiter.
    1956              25 :           ++currPos;
    1957                 :         }
    1958                 :       }
    1959              25 :       continue; // continue searching for META tag.
    1960                 :     }
    1961                 : 
    1962                 :     // Find the end of the tag, break if incomplete
    1963              25 :     tagEnd = currPos;
    1964              25 :     if (!FindCharInReadable('>', tagEnd, end))
    1965               0 :       break;
    1966                 : 
    1967                 :     // If this is not a META tag, continue to next loop
    1968             200 :     if ( (*currPos != 'm' && *currPos != 'M') ||
    1969              50 :          (*(++currPos) != 'e' && *currPos != 'E') ||
    1970              50 :          (*(++currPos) != 't' && *currPos != 'T') ||
    1971              50 :          (*(++currPos) != 'a' && *currPos != 'A') ||
    1972              25 :          !nsCRT::IsAsciiSpace(*(++currPos))) {
    1973               0 :       currPos = tagEnd;
    1974               0 :       continue;
    1975                 :     }
    1976                 : 
    1977                 :     // If could not find "charset" in this tag, skip this tag and try next
    1978              25 :     tokEnd = tagEnd;
    1979              25 :     if (!CaseInsensitiveFindInReadable(NS_LITERAL_CSTRING("CHARSET"),
    1980              25 :                                        currPos, tokEnd)) {
    1981               0 :       currPos = tagEnd;
    1982               0 :       continue;
    1983                 :     }
    1984              25 :     currPos = tokEnd;
    1985                 : 
    1986                 :     // skip spaces before '='
    1987             100 :     while (*currPos == kSpace || *currPos == kNewLine ||
    1988              50 :            *currPos == kCR || *currPos == kTab) {
    1989               0 :       ++currPos;
    1990                 :     }
    1991                 :     // skip '='
    1992              25 :     if (*currPos != '=') {
    1993               0 :       currPos = tagEnd;
    1994               0 :       continue;
    1995                 :     }
    1996              25 :     ++currPos;
    1997                 :     // skip spaces after '='
    1998             100 :     while (*currPos == kSpace || *currPos == kNewLine ||
    1999              50 :            *currPos == kCR || *currPos == kTab) {
    2000               0 :       ++currPos;
    2001                 :     }
    2002                 : 
    2003                 :     // skip open quote
    2004              25 :     if (*currPos == '\'' || *currPos == '\"')
    2005               0 :       ++currPos;
    2006                 : 
    2007                 :     // find the end of charset string
    2008              25 :     tokEnd = currPos;
    2009             175 :     while (*tokEnd != '\'' && *tokEnd != '\"' && tokEnd != tagEnd)
    2010             125 :       ++tokEnd;
    2011                 : 
    2012                 :     // return true if we successfully got something for charset
    2013              25 :     if (currPos != tokEnd) {
    2014              25 :       aCharset.Assign(currPos.get(), tokEnd.get() - currPos.get());
    2015              25 :       return true;
    2016                 :     }
    2017                 : 
    2018                 :     // Nothing specified as charset, continue next loop
    2019               0 :     currPos = tagEnd;
    2020                 :   }
    2021                 : 
    2022               0 :   return false;
    2023                 : }
    2024                 : 
    2025                 : static NS_METHOD
    2026               0 : NoOpParserWriteFunc(nsIInputStream* in,
    2027                 :                 void* closure,
    2028                 :                 const char* fromRawSegment,
    2029                 :                 PRUint32 toOffset,
    2030                 :                 PRUint32 count,
    2031                 :                 PRUint32 *writeCount)
    2032                 : {
    2033               0 :   *writeCount = count;
    2034               0 :   return NS_OK;
    2035                 : }
    2036                 : 
    2037                 : typedef struct {
    2038                 :   bool mNeedCharsetCheck;
    2039                 :   nsParser* mParser;
    2040                 :   nsScanner* mScanner;
    2041                 :   nsIRequest* mRequest;
    2042                 : } ParserWriteStruct;
    2043                 : 
    2044                 : /*
    2045                 :  * This function is invoked as a result of a call to a stream's
    2046                 :  * ReadSegments() method. It is called for each contiguous buffer
    2047                 :  * of data in the underlying stream or pipe. Using ReadSegments
    2048                 :  * allows us to avoid copying data to read out of the stream.
    2049                 :  */
    2050                 : static NS_METHOD
    2051            3491 : ParserWriteFunc(nsIInputStream* in,
    2052                 :                 void* closure,
    2053                 :                 const char* fromRawSegment,
    2054                 :                 PRUint32 toOffset,
    2055                 :                 PRUint32 count,
    2056                 :                 PRUint32 *writeCount)
    2057                 : {
    2058                 :   nsresult result;
    2059            3491 :   ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
    2060            3491 :   const char* buf = fromRawSegment;
    2061            3491 :   PRUint32 theNumRead = count;
    2062                 : 
    2063            3491 :   if (!pws) {
    2064               0 :     return NS_ERROR_FAILURE;
    2065                 :   }
    2066                 : 
    2067            3491 :   if (pws->mNeedCharsetCheck) {
    2068                 :     PRInt32 guessSource;
    2069            4942 :     nsCAutoString guess;
    2070            4942 :     nsCAutoString preferred;
    2071                 : 
    2072            2471 :     pws->mNeedCharsetCheck = false;
    2073            4917 :     if (pws->mParser->DetectMetaTag(buf, theNumRead, guess, guessSource) ||
    2074                 :         ((count >= 4) &&
    2075                 :          DetectByteOrderMark((const unsigned char*)buf,
    2076            2446 :                              theNumRead, guess, guessSource))) {
    2077             591 :       result = nsCharsetAlias::GetPreferred(guess, preferred);
    2078                 :       // Only continue if it's a recognized charset and not
    2079                 :       // one of a designated set that we ignore.
    2080            2364 :       if (NS_SUCCEEDED(result) &&
    2081                 :           ((kCharsetFromByteOrderMark == guessSource) ||
    2082             591 :            (!preferred.EqualsLiteral("UTF-16") &&
    2083             591 :             !preferred.EqualsLiteral("UTF-16BE") &&
    2084             591 :             !preferred.EqualsLiteral("UTF-16LE")))) {
    2085             591 :         guess = preferred;
    2086             591 :         pws->mParser->SetDocumentCharset(guess, guessSource);
    2087             591 :         pws->mParser->SetSinkCharset(preferred);
    2088            1182 :         nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest));
    2089             591 :         if (channel) {
    2090             722 :           nsCOMPtr<nsISupports> cacheToken;
    2091             361 :           channel->GetCacheToken(getter_AddRefs(cacheToken));
    2092             361 :           if (cacheToken) {
    2093             722 :             nsCOMPtr<nsICacheEntryDescriptor> cacheDescriptor(do_QueryInterface(cacheToken));
    2094             361 :             if (cacheDescriptor) {
    2095                 : #ifdef DEBUG
    2096                 :               nsresult rv =
    2097                 : #endif
    2098             361 :                 cacheDescriptor->SetMetaDataElement("charset",
    2099             361 :                                                     guess.get());
    2100             361 :               NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement");
    2101                 :             }
    2102                 :           }
    2103                 :         }
    2104                 :       }
    2105                 :     }
    2106                 :   }
    2107                 : 
    2108            3491 :   result = pws->mScanner->Append(buf, theNumRead, pws->mRequest);
    2109            3491 :   if (NS_SUCCEEDED(result)) {
    2110            3491 :     *writeCount = count;
    2111                 :   }
    2112                 : 
    2113            3491 :   return result;
    2114                 : }
    2115                 : 
    2116                 : nsresult
    2117            3425 : nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
    2118                 :                           nsIInputStream *pIStream, PRUint32 sourceOffset,
    2119                 :                           PRUint32 aLength)
    2120                 : {
    2121            3425 :   NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
    2122                 :                    eOnDataAvail == mParserContext->mStreamListenerState),
    2123                 :             "Error: OnStartRequest() must be called before OnDataAvailable()");
    2124            3425 :   NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
    2125                 :                   "Must have a buffered input stream");
    2126                 : 
    2127            3425 :   nsresult rv = NS_OK;
    2128                 : 
    2129            3425 :   if (mIsAboutBlank) {
    2130               0 :     MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
    2131                 :     // ... but if an extension tries to feed us data for about:blank in a
    2132                 :     // release build, silently ignore the data.
    2133                 :     PRUint32 totalRead;
    2134                 :     rv = pIStream->ReadSegments(NoOpParserWriteFunc,
    2135                 :                                 nsnull,
    2136                 :                                 aLength,
    2137               0 :                                 &totalRead);
    2138               0 :     return rv;
    2139                 :   }
    2140                 : 
    2141            3425 :   CParserContext *theContext = mParserContext;
    2142                 : 
    2143            6850 :   while (theContext && theContext->mRequest != request) {
    2144               0 :     theContext = theContext->mPrevContext;
    2145                 :   }
    2146                 : 
    2147            3425 :   if (theContext) {
    2148            3425 :     theContext->mStreamListenerState = eOnDataAvail;
    2149                 : 
    2150            3425 :     if (eInvalidDetect == theContext->mAutoDetectStatus) {
    2151               0 :       if (theContext->mScanner) {
    2152               0 :         nsScannerIterator iter;
    2153               0 :         theContext->mScanner->EndReading(iter);
    2154               0 :         theContext->mScanner->SetPosition(iter, true);
    2155                 :       }
    2156                 :     }
    2157                 : 
    2158                 :     PRUint32 totalRead;
    2159                 :     ParserWriteStruct pws;
    2160                 :     pws.mNeedCharsetCheck =
    2161            3425 :       (0 == sourceOffset) && (mCharsetSource < kCharsetFromMetaTag);
    2162            3425 :     pws.mParser = this;
    2163            3425 :     pws.mScanner = theContext->mScanner;
    2164            3425 :     pws.mRequest = request;
    2165                 : 
    2166            3425 :     rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
    2167            3425 :     if (NS_FAILED(rv)) {
    2168               0 :       return rv;
    2169                 :     }
    2170                 : 
    2171                 :     // Don't bother to start parsing until we've seen some
    2172                 :     // non-whitespace data
    2173            6850 :     if (IsOkToProcessNetworkData() &&
    2174            3425 :         theContext->mScanner->FirstNonWhitespacePosition() >= 0) {
    2175            3424 :       mProcessingNetworkData = true;
    2176            3424 :       if (mSink) {
    2177            3424 :         mSink->WillParse();
    2178                 :       }
    2179            3424 :       rv = ResumeParse();
    2180            3424 :       mProcessingNetworkData = false;
    2181                 :     }
    2182                 :   } else {
    2183               0 :     rv = NS_ERROR_UNEXPECTED;
    2184                 :   }
    2185                 : 
    2186            3425 :   return rv;
    2187                 : }
    2188                 : 
    2189                 : /**
    2190                 :  *  This is called by the networking library once the last block of data
    2191                 :  *  has been collected from the net.
    2192                 :  */
    2193                 : nsresult
    2194            3341 : nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
    2195                 :                         nsresult status)
    2196                 : {
    2197            3341 :   nsresult rv = NS_OK;
    2198                 : 
    2199            3341 :   CParserContext *pc = mParserContext;
    2200            6693 :   while (pc) {
    2201            3341 :     if (pc->mRequest == request) {
    2202            3330 :       pc->mStreamListenerState = eOnStop;
    2203            3330 :       pc->mScanner->SetIncremental(false);
    2204            3330 :       break;
    2205                 :     }
    2206                 : 
    2207              11 :     pc = pc->mPrevContext;
    2208                 :   }
    2209                 : 
    2210            3341 :   mStreamStatus = status;
    2211                 : 
    2212            3341 :   if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
    2213            3341 :     mProcessingNetworkData = true;
    2214            3341 :     if (mSink) {
    2215            3330 :       mSink->WillParse();
    2216                 :     }
    2217            3341 :     rv = ResumeParse(true, true);
    2218            3341 :     mProcessingNetworkData = false;
    2219                 :   }
    2220                 : 
    2221                 :   // If the parser isn't enabled, we don't finish parsing till
    2222                 :   // it is reenabled.
    2223                 : 
    2224                 : 
    2225                 :   // XXX Should we wait to notify our observers as well if the
    2226                 :   // parser isn't yet enabled?
    2227            3341 :   if (mObserver) {
    2228               0 :     mObserver->OnStopRequest(request, aContext, status);
    2229                 :   }
    2230                 : 
    2231            3341 :   return rv;
    2232                 : }
    2233                 : 
    2234                 : 
    2235                 : /*******************************************************************
    2236                 :   Here come the tokenization methods...
    2237                 :  *******************************************************************/
    2238                 : 
    2239                 : 
    2240                 : /**
    2241                 :  *  Part of the code sandwich, this gets called right before
    2242                 :  *  the tokenization process begins. The main reason for
    2243                 :  *  this call is to allow the delegate to do initialization.
    2244                 :  */
    2245                 : bool
    2246            6757 : nsParser::WillTokenize(bool aIsFinalChunk)
    2247                 : {
    2248            6757 :   if (!mParserContext) {
    2249               0 :     return true;
    2250                 :   }
    2251                 : 
    2252                 :   nsITokenizer* theTokenizer;
    2253            6757 :   nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
    2254            6757 :   NS_ENSURE_SUCCESS(result, false);
    2255            6757 :   return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk,
    2256                 :                                                  &mTokenAllocator));
    2257                 : }
    2258                 : 
    2259                 : 
    2260                 : /**
    2261                 :  * This is the primary control routine to consume tokens.
    2262                 :  * It iteratively consumes tokens until an error occurs or
    2263                 :  * you run out of data.
    2264                 :  */
    2265            6757 : nsresult nsParser::Tokenize(bool aIsFinalChunk)
    2266                 : {
    2267                 :   nsITokenizer* theTokenizer;
    2268                 : 
    2269            6757 :   nsresult result = NS_ERROR_NOT_AVAILABLE;
    2270            6757 :   if (mParserContext) {
    2271            6757 :     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
    2272                 :   }
    2273                 : 
    2274            6757 :   if (NS_SUCCEEDED(result)) {
    2275            6757 :     if (mFlags & NS_PARSER_FLAG_FLUSH_TOKENS) {
    2276                 :       // For some reason tokens didn't get flushed (probably
    2277                 :       // the parser got blocked before all the tokens in the
    2278                 :       // stack got handled). Flush 'em now. Ref. bug 104856
    2279               0 :       if (theTokenizer->GetCount() != 0) {
    2280               0 :         return result;
    2281                 :       }
    2282                 : 
    2283                 :       // Reset since the tokens have been flushed.
    2284               0 :       mFlags &= ~NS_PARSER_FLAG_FLUSH_TOKENS;
    2285                 :     }
    2286                 : 
    2287            6757 :     bool flushTokens = false;
    2288                 : 
    2289            6757 :     mParserContext->mNumConsumed = 0;
    2290                 : 
    2291            6757 :     bool killSink = false;
    2292                 : 
    2293            6757 :     WillTokenize(aIsFinalChunk);
    2294            6757 :     while (NS_SUCCEEDED(result)) {
    2295            9012 :       mParserContext->mNumConsumed += mParserContext->mScanner->Mark();
    2296            9012 :       result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
    2297           18024 :                                           flushTokens);
    2298            9012 :       if (NS_FAILED(result)) {
    2299            6757 :         mParserContext->mScanner->RewindToMark();
    2300            6757 :         if (kEOF == result){
    2301            6692 :           break;
    2302                 :         }
    2303              65 :         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
    2304              65 :           killSink = true;
    2305              65 :           result = Terminate();
    2306              65 :           break;
    2307                 :         }
    2308            2255 :       } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
    2309                 :         // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
    2310                 :         // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
    2311                 :         // Also remember to update the marked position.
    2312               0 :         mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
    2313               0 :         mParserContext->mNumConsumed += mParserContext->mScanner->Mark();
    2314               0 :         break;
    2315                 :       }
    2316                 :     }
    2317            6757 :     DidTokenize(aIsFinalChunk);
    2318                 : 
    2319            6757 :     if (killSink) {
    2320              65 :       mSink = nsnull;
    2321                 :     }
    2322                 :   } else {
    2323               0 :     result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
    2324                 :   }
    2325                 : 
    2326            6757 :   return result;
    2327                 : }
    2328                 : 
    2329                 : /**
    2330                 :  *  This is the tail-end of the code sandwich for the
    2331                 :  *  tokenization process. It gets called once tokenziation
    2332                 :  *  has completed for each phase.
    2333                 :  */
    2334                 : bool
    2335            6757 : nsParser::DidTokenize(bool aIsFinalChunk)
    2336                 : {
    2337            6757 :   if (!mParserContext) {
    2338               0 :     return true;
    2339                 :   }
    2340                 : 
    2341                 :   nsITokenizer* theTokenizer;
    2342            6757 :   nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
    2343            6757 :   NS_ENSURE_SUCCESS(rv, false);
    2344                 : 
    2345            6757 :   rv = theTokenizer->DidTokenize(aIsFinalChunk);
    2346            6757 :   return NS_SUCCEEDED(rv);
    2347                 : }
    2348                 : 
    2349                 : /**
    2350                 :  * Get the channel associated with this parser
    2351                 :  *
    2352                 :  * @param aChannel out param that will contain the result
    2353                 :  * @return NS_OK if successful
    2354                 :  */
    2355                 : NS_IMETHODIMP
    2356               0 : nsParser::GetChannel(nsIChannel** aChannel)
    2357                 : {
    2358               0 :   nsresult result = NS_ERROR_NOT_AVAILABLE;
    2359               0 :   if (mParserContext && mParserContext->mRequest) {
    2360               0 :     result = CallQueryInterface(mParserContext->mRequest, aChannel);
    2361                 :   }
    2362               0 :   return result;
    2363                 : }
    2364                 : 
    2365                 : /**
    2366                 :  * Get the DTD associated with this parser
    2367                 :  */
    2368                 : NS_IMETHODIMP
    2369               0 : nsParser::GetDTD(nsIDTD** aDTD)
    2370                 : {
    2371               0 :   if (mParserContext) {
    2372               0 :     NS_IF_ADDREF(*aDTD = mDTD);
    2373                 :   }
    2374                 : 
    2375               0 :   return NS_OK;
    2376                 : }
    2377                 : 
    2378                 : /**
    2379                 :  * Get this as nsIStreamListener
    2380                 :  */
    2381                 : nsIStreamListener*
    2382               0 : nsParser::GetStreamListener()
    2383                 : {
    2384               0 :   return this;
    2385            4392 : }

Generated by: LCOV version 1.7