LCOV - code coverage report
Current view: directory - parser/html - nsHtml5StreamParser.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 777 6 0.8 %
Date: 2012-06-02 Functions: 67 3 4.5 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* vim: set sw=2 ts=2 et tw=79: */
       3                 : /* ***** BEGIN LICENSE BLOCK *****
       4                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       5                 :  *
       6                 :  * The contents of this file are subject to the Mozilla Public License Version
       7                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       8                 :  * the License. You may obtain a copy of the License at
       9                 :  * http://www.mozilla.org/MPL/
      10                 :  *
      11                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      12                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      13                 :  * for the specific language governing rights and limitations under the
      14                 :  * License.
      15                 :  *
      16                 :  * The Original Code is mozilla.org code.
      17                 :  *
      18                 :  * The Initial Developer of the Original Code is
      19                 :  * Netscape Communications Corporation.
      20                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      21                 :  * the Initial Developer. All Rights Reserved.
      22                 :  *
      23                 :  * Contributor(s):
      24                 :  *   Pierre Phaneuf <pp@ludusdesign.com>
      25                 :  *   Henri Sivonen <hsivonen@iki.fi>
      26                 :  *
      27                 :  * Alternatively, the contents of this file may be used under the terms of
      28                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      29                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      30                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      31                 :  * of those above. If you wish to allow use of your version of this file only
      32                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      33                 :  * use your version of this file under the terms of the MPL, indicate your
      34                 :  * decision by deleting the provisions above and replace them with the notice
      35                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      36                 :  * the provisions above, a recipient may use your version of this file under
      37                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      38                 :  *
      39                 :  * ***** END LICENSE BLOCK ***** */
      40                 : 
      41                 : #include "nsHtml5StreamParser.h"
      42                 : #include "nsICharsetConverterManager.h"
      43                 : #include "nsCharsetAlias.h"
      44                 : #include "nsServiceManagerUtils.h"
      45                 : #include "nsEncoderDecoderUtils.h"
      46                 : #include "nsContentUtils.h"
      47                 : #include "nsHtml5Tokenizer.h"
      48                 : #include "nsIHttpChannel.h"
      49                 : #include "nsHtml5Parser.h"
      50                 : #include "nsHtml5TreeBuilder.h"
      51                 : #include "nsHtml5AtomTable.h"
      52                 : #include "nsHtml5Module.h"
      53                 : #include "nsHtml5RefPtr.h"
      54                 : #include "nsIScriptError.h"
      55                 : #include "mozilla/Preferences.h"
      56                 : #include "nsHtml5Highlighter.h"
      57                 : #include "expat_config.h"
      58                 : #include "expat.h"
      59                 : #include "nsINestedURI.h"
      60                 : 
      61                 : using namespace mozilla;
      62                 : 
      63                 : 
      64                 : PRInt32 nsHtml5StreamParser::sTimerInitialDelay = 120;
      65                 : PRInt32 nsHtml5StreamParser::sTimerSubsequentDelay = 120;
      66                 : 
      67                 : // static
      68                 : void
      69            1404 : nsHtml5StreamParser::InitializeStatics()
      70                 : {
      71                 :   Preferences::AddIntVarCache(&sTimerInitialDelay,
      72            1404 :                               "html5.flushtimer.initialdelay");
      73                 :   Preferences::AddIntVarCache(&sTimerSubsequentDelay,
      74            1404 :                               "html5.flushtimer.subsequentdelay");
      75            1404 : }
      76                 : 
      77                 : /*
      78                 :  * Note that nsHtml5StreamParser implements cycle collecting AddRef and
      79                 :  * Release. Therefore, nsHtml5StreamParser must never be refcounted from
      80                 :  * the parser thread!
      81                 :  *
      82                 :  * To work around this limitation, runnables posted by the main thread to the
      83                 :  * parser thread hold their reference to the stream parser in an
      84                 :  * nsHtml5RefPtr. Upon creation, nsHtml5RefPtr addrefs the object it holds
      85                 :  * just like a regular nsRefPtr. This is OK, since the creation of the
      86                 :  * runnable and the nsHtml5RefPtr happens on the main thread.
      87                 :  *
      88                 :  * When the runnable is done on the parser thread, the destructor of
      89                 :  * nsHtml5RefPtr runs there. It doesn't call Release on the held object
      90                 :  * directly. Instead, it posts another runnable back to the main thread where
      91                 :  * that runnable calls Release on the wrapped object.
      92                 :  *
      93                 :  * When posting runnables in the other direction, the runnables have to be
      94                 :  * created on the main thread when nsHtml5StreamParser is instantiated and
      95                 :  * held for the lifetime of the nsHtml5StreamParser. This works, because the
      96                 :  * same runnabled can be dispatched multiple times and currently runnables
      97                 :  * posted from the parser thread to main thread don't need to wrap any
      98                 :  * runnable-specific data. (In the other direction, the runnables most notably
      99                 :  * wrap the byte data of the stream.)
     100                 :  */
     101               0 : NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
     102               0 : NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
     103                 : 
     104               0 : NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser)
     105               0 :   NS_INTERFACE_TABLE2(nsHtml5StreamParser, 
     106                 :                       nsIStreamListener, 
     107                 :                       nsICharsetDetectionObserver)
     108               0 :   NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser)
     109               0 : NS_INTERFACE_MAP_END
     110                 : 
     111            1464 : NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser)
     112                 : 
     113               0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser)
     114               0 :   tmp->DropTimer();
     115               0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mObserver)
     116               0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mRequest)
     117               0 :   tmp->mOwner = nsnull;
     118               0 :   tmp->mExecutorFlusher = nsnull;
     119               0 :   tmp->mLoadFlusher = nsnull;
     120               0 :   tmp->mExecutor = nsnull;
     121               0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mChardet)
     122               0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_END
     123                 : 
     124               0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
     125               0 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mObserver)
     126               0 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mRequest)
     127               0 :   if (tmp->mOwner) {
     128               0 :     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mOwner");
     129               0 :     cb.NoteXPCOMChild(static_cast<nsIParser*> (tmp->mOwner));
     130                 :   }
     131                 :   // hack: count the strongly owned edge wrapped in the runnable
     132               0 :   if (tmp->mExecutorFlusher) {
     133               0 :     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor");
     134               0 :     cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor));
     135                 :   }
     136                 :   // hack: count the strongly owned edge wrapped in the runnable
     137               0 :   if (tmp->mLoadFlusher) {
     138               0 :     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor");
     139               0 :     cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor));
     140                 :   }
     141                 :   // hack: count self if held by mChardet
     142               0 :   if (tmp->mChardet) {
     143               0 :     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, 
     144                 :       "mChardet->mObserver");
     145               0 :     cb.NoteXPCOMChild(static_cast<nsIStreamListener*>(tmp));
     146                 :   }
     147               0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
     148                 : 
     149                 : class nsHtml5ExecutorFlusher : public nsRunnable
     150               0 : {
     151                 :   private:
     152                 :     nsRefPtr<nsHtml5TreeOpExecutor> mExecutor;
     153                 :   public:
     154               0 :     nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor)
     155               0 :       : mExecutor(aExecutor)
     156               0 :     {}
     157               0 :     NS_IMETHODIMP Run()
     158                 :     {
     159               0 :       mExecutor->RunFlushLoop();
     160               0 :       return NS_OK;
     161                 :     }
     162                 : };
     163                 : 
     164                 : class nsHtml5LoadFlusher : public nsRunnable
     165               0 : {
     166                 :   private:
     167                 :     nsRefPtr<nsHtml5TreeOpExecutor> mExecutor;
     168                 :   public:
     169               0 :     nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor)
     170               0 :       : mExecutor(aExecutor)
     171               0 :     {}
     172               0 :     NS_IMETHODIMP Run()
     173                 :     {
     174               0 :       mExecutor->FlushSpeculativeLoads();
     175               0 :       return NS_OK;
     176                 :     }
     177                 : };
     178                 : 
     179               0 : nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
     180                 :                                          nsHtml5Parser* aOwner,
     181                 :                                          eParserMode aMode)
     182                 :   : mFirstBuffer(nsnull) // Will be filled when starting
     183                 :   , mLastBuffer(nsnull) // Will be filled when starting
     184                 :   , mExecutor(aExecutor)
     185                 :   , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML ||
     186                 :                                          aMode == VIEW_SOURCE_XML) ?
     187               0 :                                              nsnull : mExecutor->GetStage(),
     188                 :                                          aMode == NORMAL ?
     189               0 :                                              mExecutor->GetStage() : nsnull))
     190               0 :   , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML))
     191                 :   , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex")
     192                 :   , mOwner(aOwner)
     193                 :   , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex")
     194                 :   , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex")
     195                 :   , mThread(nsHtml5Module::GetStreamParserThread())
     196               0 :   , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor))
     197               0 :   , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor))
     198                 :   , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1"))
     199               0 :   , mMode(aMode)
     200                 : {
     201               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     202               0 :   mFlushTimer->SetTarget(mThread);
     203               0 :   mAtomTable.Init(); // we aren't checking for OOM anyway...
     204                 : #ifdef DEBUG
     205               0 :   mAtomTable.SetPermittedLookupThread(mThread);
     206                 : #endif
     207               0 :   mTokenizer->setInterner(&mAtomTable);
     208               0 :   mTokenizer->setEncodingDeclarationHandler(this);
     209                 : 
     210               0 :   if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) {
     211                 :     nsHtml5Highlighter* highlighter =
     212               0 :       new nsHtml5Highlighter(mExecutor->GetStage());
     213               0 :     mTokenizer->EnableViewSource(highlighter); // takes ownership
     214               0 :     mTreeBuilder->EnableViewSource(highlighter); // doesn't own
     215                 :   }
     216                 : 
     217                 :   // Chardet instantiation adapted from nsDOMFile.
     218                 :   // Chardet is initialized here even if it turns out to be useless
     219                 :   // to make the chardet refcount its observer (nsHtml5StreamParser)
     220                 :   // on the main thread.
     221                 :   const nsAdoptingCString& detectorName =
     222               0 :     Preferences::GetLocalizedCString("intl.charset.detector");
     223               0 :   if (!detectorName.IsEmpty()) {
     224               0 :     nsCAutoString detectorContractID;
     225               0 :     detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
     226               0 :     detectorContractID += detectorName;
     227               0 :     if ((mChardet = do_CreateInstance(detectorContractID.get()))) {
     228               0 :       (void) mChardet->Init(this);
     229               0 :       mFeedChardet = true;
     230                 :     }
     231                 :   }
     232                 : 
     233                 :   // There's a zeroing operator new for everything else
     234               0 : }
     235                 : 
     236               0 : nsHtml5StreamParser::~nsHtml5StreamParser()
     237                 : {
     238               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     239               0 :   mTokenizer->end();
     240               0 :   NS_ASSERTION(!mFlushTimer, "Flush timer was not dropped before dtor!");
     241                 : #ifdef DEBUG
     242               0 :   mRequest = nsnull;
     243               0 :   mObserver = nsnull;
     244               0 :   mUnicodeDecoder = nsnull;
     245               0 :   mSniffingBuffer = nsnull;
     246               0 :   mMetaScanner = nsnull;
     247               0 :   mFirstBuffer = nsnull;
     248               0 :   mExecutor = nsnull;
     249               0 :   mTreeBuilder = nsnull;
     250               0 :   mTokenizer = nsnull;
     251               0 :   mOwner = nsnull;
     252                 : #endif
     253               0 : }
     254                 : 
     255                 : nsresult
     256               0 : nsHtml5StreamParser::GetChannel(nsIChannel** aChannel)
     257                 : {
     258               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     259               0 :   return mRequest ? CallQueryInterface(mRequest, aChannel) :
     260               0 :                     NS_ERROR_NOT_AVAILABLE;
     261                 : }
     262                 : 
     263                 : NS_IMETHODIMP
     264               0 : nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
     265                 : {
     266               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     267               0 :   if (aConf == eBestAnswer || aConf == eSureAnswer) {
     268               0 :     mFeedChardet = false; // just in case
     269               0 :     if (HasDecoder()) {
     270               0 :       if (mCharset.Equals(aCharset)) {
     271               0 :         NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
     272                 :             "Why are we running chardet at all?");
     273               0 :         mCharsetSource = kCharsetFromAutoDetection;
     274               0 :         mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     275                 :       } else {
     276                 :         // We've already committed to a decoder. Request a reload from the
     277                 :         // docshell.
     278               0 :         nsCAutoString charset(aCharset);
     279               0 :         mTreeBuilder->NeedsCharsetSwitchTo(charset, kCharsetFromAutoDetection);
     280               0 :         FlushTreeOpsAndDisarmTimer();
     281               0 :         Interrupt();
     282                 :       }
     283                 :     } else {
     284                 :       // Got a confident answer from the sniffing buffer. That code will
     285                 :       // take care of setting up the decoder.
     286               0 :       mCharset.Assign(aCharset);
     287               0 :       mCharsetSource = kCharsetFromAutoDetection;
     288               0 :       mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     289                 :     }
     290                 :   }
     291               0 :   return NS_OK;
     292                 : }
     293                 : 
     294                 : void
     295               0 : nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL)
     296                 : {
     297               0 :   if (aURL) {
     298               0 :     nsCOMPtr<nsIURI> temp;
     299                 :     bool isViewSource;
     300               0 :     aURL->SchemeIs("view-source", &isViewSource);
     301               0 :     if (isViewSource) {
     302               0 :       nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL);
     303               0 :       nested->GetInnerURI(getter_AddRefs(temp));
     304                 :     } else {
     305               0 :       temp = aURL;
     306                 :     }
     307                 :     bool isData;
     308               0 :     temp->SchemeIs("data", &isData);
     309               0 :     if (isData) {
     310                 :       // Avoid showing potentially huge data: URLs. The three last bytes are
     311                 :       // UTF-8 for an ellipsis.
     312               0 :       mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6");
     313                 :     } else {
     314               0 :       temp->GetSpec(mViewSourceTitle);
     315                 :     }
     316                 :   }
     317               0 : }
     318                 : 
     319                 : nsresult
     320               0 : nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
     321                 :                                                                           PRUint32 aCount,
     322                 :                                                                           PRUint32* aWriteCount)
     323                 : {
     324               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     325               0 :   nsresult rv = NS_OK;
     326               0 :   nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
     327               0 :   NS_ENSURE_SUCCESS(rv, rv);
     328               0 :   rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
     329               0 :   if (rv == NS_ERROR_UCONV_NOCONV) {
     330               0 :     mCharset.AssignLiteral("windows-1252"); // lower case is the raw form
     331               0 :     mCharsetSource = kCharsetFromWeakDocTypeDefault;
     332               0 :     rv = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
     333               0 :     mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     334                 :   }
     335               0 :   NS_ENSURE_SUCCESS(rv, rv);
     336               0 :   mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
     337               0 :   return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
     338                 : }
     339                 : 
     340                 : nsresult
     341               0 : nsHtml5StreamParser::WriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
     342                 :                                                           PRUint32 aCount,
     343                 :                                                           PRUint32* aWriteCount)
     344                 : {
     345               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     346               0 :   nsresult rv = NS_OK;
     347               0 :   if (mSniffingBuffer) {
     348                 :     PRUint32 writeCount;
     349               0 :     rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount);
     350               0 :     NS_ENSURE_SUCCESS(rv, rv);
     351               0 :     mSniffingBuffer = nsnull;
     352                 :   }
     353               0 :   mMetaScanner = nsnull;
     354               0 :   if (aFromSegment) {
     355               0 :     rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount);
     356                 :   }
     357               0 :   return rv;
     358                 : }
     359                 : 
     360                 : nsresult
     361               0 : nsHtml5StreamParser::SetupDecodingFromBom(const char* aCharsetName, const char* aDecoderCharsetName)
     362                 : {
     363               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     364               0 :   nsresult rv = NS_OK;
     365               0 :   nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
     366               0 :   NS_ENSURE_SUCCESS(rv, rv);
     367               0 :   rv = convManager->GetUnicodeDecoderRaw(aDecoderCharsetName, getter_AddRefs(mUnicodeDecoder));
     368               0 :   NS_ENSURE_SUCCESS(rv, rv);
     369               0 :   mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
     370               0 :   mCharset.Assign(aCharsetName);
     371               0 :   mCharsetSource = kCharsetFromByteOrderMark;
     372               0 :   mFeedChardet = false;
     373               0 :   mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     374               0 :   mSniffingBuffer = nsnull;
     375               0 :   mMetaScanner = nsnull;
     376               0 :   mBomState = BOM_SNIFFING_OVER;
     377               0 :   return rv;
     378                 : }
     379                 : 
     380                 : void
     381               0 : nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const PRUint8* aFromSegment,
     382                 :                                                  PRUint32 aCountToSniffingLimit)
     383                 : {
     384                 :   // Avoid underspecified heuristic craziness for XHR
     385               0 :   if (mMode == LOAD_AS_DATA) {
     386               0 :     return;
     387                 :   }
     388                 :   // Make sure there's enough data. Require room for "<title></title>"
     389               0 :   if (mSniffingLength + aCountToSniffingLimit < 30) {
     390               0 :     return;
     391                 :   }
     392                 :   // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
     393               0 :   bool byteZero[2] = { false, false };
     394               0 :   bool byteNonZero[2] = { false, false };
     395               0 :   PRUint32 i = 0;
     396               0 :   if (mSniffingBuffer) {
     397               0 :     for (; i < mSniffingLength; ++i) {
     398               0 :       if (mSniffingBuffer[i]) {
     399               0 :         if (byteNonZero[1 - (i % 2)]) {
     400               0 :           return;
     401                 :         }
     402               0 :         byteNonZero[i % 2] = true;
     403                 :       } else {
     404               0 :         if (byteZero[1 - (i % 2)]) {
     405               0 :           return;
     406                 :         }
     407               0 :         byteZero[i % 2] = true;
     408                 :       }
     409                 :     }
     410                 :   }
     411               0 :   if (aFromSegment) {
     412               0 :     for (PRUint32 j = 0; j < aCountToSniffingLimit; ++j) {
     413               0 :       if (aFromSegment[j]) {
     414               0 :         if (byteNonZero[1 - ((i + j) % 2)]) {
     415               0 :           return;
     416                 :         }
     417               0 :         byteNonZero[(i + j) % 2] = true;
     418                 :       } else {
     419               0 :         if (byteZero[1 - ((i + j) % 2)]) {
     420               0 :           return;
     421                 :         }
     422               0 :         byteZero[(i + j) % 2] = true;
     423                 :       }
     424                 :     }
     425                 :   }
     426                 : 
     427               0 :   if (byteNonZero[0]) {
     428               0 :     mCharset.Assign("UTF-16LE");
     429                 :   } else {
     430               0 :     mCharset.Assign("UTF-16BE");
     431                 :   }
     432               0 :   mCharsetSource = kCharsetFromIrreversibleAutoDetection;
     433               0 :   mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     434               0 :   mFeedChardet = false;
     435                 : }
     436                 : 
     437                 : void
     438               0 : nsHtml5StreamParser::SetEncodingFromExpat(const PRUnichar* aEncoding)
     439                 : {
     440               0 :   if (aEncoding) {
     441               0 :     nsDependentString utf16(aEncoding);
     442               0 :     nsCAutoString utf8;
     443               0 :     CopyUTF16toUTF8(utf16, utf8);
     444               0 :     if (PreferredForInternalEncodingDecl(utf8)) {
     445               0 :       mCharset.Assign(utf8);
     446               0 :       mCharsetSource = kCharsetFromMetaTag; // closest for XML
     447                 :       return;
     448                 :     }
     449                 :     // else the page declared an encoding Gecko doesn't support and we'd
     450                 :     // end up defaulting to UTF-8 anyway. Might as well fall through here
     451                 :     // right away and let the encoding be set to UTF-8 which we'd default to
     452                 :     // anyway.
     453                 :   }
     454               0 :   mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM
     455               0 :   mCharsetSource = kCharsetFromMetaTag; // means confident
     456                 : }
     457                 : 
     458                 : // A separate user data struct is used instead of passing the
     459                 : // nsHtml5StreamParser instance as user data in order to avoid including
     460                 : // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts.
     461                 : // Using a separate user data struct also avoids bloating nsHtml5StreamParser
     462                 : // by one pointer.
     463                 : struct UserData {
     464                 :   XML_Parser mExpat;
     465                 :   nsHtml5StreamParser* mStreamParser;
     466                 : };
     467                 : 
     468                 : // Using no-namespace handler callbacks to avoid including expat.h in
     469                 : // nsHtml5StreamParser.h, since doing so would cause naming conclicts.
     470                 : static void
     471               0 : HandleXMLDeclaration(void* aUserData,
     472                 :                      const XML_Char* aVersion,
     473                 :                      const XML_Char* aEncoding,
     474                 :                      int aStandalone)
     475                 : {
     476               0 :   UserData* ud = static_cast<UserData*>(aUserData);
     477                 :   ud->mStreamParser->SetEncodingFromExpat(
     478               0 :       reinterpret_cast<const PRUnichar*>(aEncoding));
     479               0 :   XML_StopParser(ud->mExpat, false);
     480               0 : }
     481                 : 
     482                 : static void
     483               0 : HandleStartElement(void* aUserData,
     484                 :                    const XML_Char* aName,
     485                 :                    const XML_Char **aAtts)
     486                 : {
     487               0 :   UserData* ud = static_cast<UserData*>(aUserData);
     488               0 :   XML_StopParser(ud->mExpat, false);
     489               0 : }
     490                 : 
     491                 : static void
     492               0 : HandleEndElement(void* aUserData,
     493                 :                  const XML_Char* aName)
     494                 : {
     495               0 :   UserData* ud = static_cast<UserData*>(aUserData);
     496               0 :   XML_StopParser(ud->mExpat, false);
     497               0 : }
     498                 : 
     499                 : static void
     500               0 : HandleComment(void* aUserData,
     501                 :               const XML_Char* aName)
     502                 : {
     503               0 :   UserData* ud = static_cast<UserData*>(aUserData);
     504               0 :   XML_StopParser(ud->mExpat, false);
     505               0 : }
     506                 : 
     507                 : static void
     508               0 : HandleProcessingInstruction(void* aUserData,
     509                 :                             const XML_Char* aTarget,
     510                 :                             const XML_Char* aData)
     511                 : {
     512               0 :   UserData* ud = static_cast<UserData*>(aUserData);
     513               0 :   XML_StopParser(ud->mExpat, false);
     514               0 : }
     515                 : 
     516                 : nsresult
     517               0 : nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be null
     518                 :                                       PRUint32 aCount,
     519                 :                                       PRUint32* aWriteCount,
     520                 :                                       PRUint32 aCountToSniffingLimit)
     521                 : {
     522               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     523               0 :   NS_ASSERTION(mCharsetSource < kCharsetFromMetaTag,
     524                 :       "Should not finalize sniffing when already confident.");
     525               0 :   if (mMode == VIEW_SOURCE_XML) {
     526                 :     static const XML_Memory_Handling_Suite memsuite =
     527                 :       {
     528                 :         (void *(*)(size_t))moz_xmalloc,
     529                 :         (void *(*)(void *, size_t))moz_xrealloc,
     530                 :         moz_free
     531                 :       };
     532                 : 
     533                 :     static const PRUnichar kExpatSeparator[] = { 0xFFFF, '\0' };
     534                 : 
     535                 :     static const PRUnichar kISO88591[] =
     536                 :         { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' };
     537                 : 
     538                 :     UserData ud;
     539               0 :     ud.mStreamParser = this;
     540                 : 
     541                 :     // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML
     542                 :     // documents MUST begin with a BOM. We don't support EBCDIC and such.
     543                 :     // Thus, at this point, what we have is garbage or something encoded using
     544                 :     // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes
     545                 :     // without throwing errors when bytes have the most significant bit set
     546                 :     // and without triggering expat's unknown encoding code paths. This is
     547                 :     // enough to be able to use expat to parse the XML declaration in order
     548                 :     // to extract the encoding name from it.
     549               0 :     ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator);
     550               0 :     XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration);
     551               0 :     XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement);
     552               0 :     XML_SetCommentHandler(ud.mExpat, HandleComment);
     553               0 :     XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction);
     554               0 :     XML_SetUserData(ud.mExpat, static_cast<void*>(&ud));
     555                 : 
     556               0 :     XML_Status status = XML_STATUS_OK;
     557                 : 
     558                 :     // aFromSegment points to the data obtained from the current network
     559                 :     // event. mSniffingBuffer (if it exists) contains the data obtained before
     560                 :     // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer
     561                 :     // followed by aCountToSniffingLimit bytes from aFromSegment are the
     562                 :     // first 1024 bytes of the file (or the file as a whole if the file is
     563                 :     // 1024 bytes long or shorter). Thus, we parse both buffers, but if the
     564                 :     // first call succeeds already, we skip parsing the second buffer.
     565               0 :     if (mSniffingBuffer) {
     566                 :       status = XML_Parse(ud.mExpat,
     567               0 :                          reinterpret_cast<const char*>(mSniffingBuffer.get()),
     568                 :                          mSniffingLength,
     569               0 :                          false);
     570                 :     }
     571               0 :     if (status == XML_STATUS_OK &&
     572                 :         mCharsetSource < kCharsetFromMetaTag &&
     573                 :         aFromSegment) {
     574                 :       status = XML_Parse(ud.mExpat,
     575                 :                          reinterpret_cast<const char*>(aFromSegment),
     576                 :                          aCountToSniffingLimit,
     577               0 :                          false);
     578                 :     }
     579               0 :     XML_ParserFree(ud.mExpat);
     580                 : 
     581               0 :     if (mCharsetSource < kCharsetFromMetaTag) {
     582                 :       // Failed to get an encoding from the XML declaration. XML defaults
     583                 :       // confidently to UTF-8 in this case.
     584                 :       // It is also possible that the document has an XML declaration that is
     585                 :       // longer than 1024 bytes, but that case is not worth worrying about.
     586               0 :       mCharset.AssignLiteral("UTF-8");
     587               0 :       mCharsetSource = kCharsetFromMetaTag; // means confident
     588                 :     }
     589                 : 
     590                 :     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
     591                 :                                                                 aCount,
     592               0 :                                                                 aWriteCount);
     593                 :   }
     594                 : 
     595                 :   // meta scan failed.
     596               0 :   if (mCharsetSource >= kCharsetFromHintPrevDoc) {
     597               0 :     mFeedChardet = false;
     598               0 :     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
     599                 :   }
     600                 :   // Check for BOMless UTF-16 with Basic
     601                 :   // Latin content for compat with IE. See bug 631751.
     602               0 :   SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit);
     603                 :   // the charset may have been set now
     604                 :   // maybe try chardet now; 
     605               0 :   if (mFeedChardet) {
     606                 :     bool dontFeed;
     607                 :     nsresult rv;
     608               0 :     if (mSniffingBuffer) {
     609               0 :       rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed);
     610               0 :       mFeedChardet = !dontFeed;
     611               0 :       NS_ENSURE_SUCCESS(rv, rv);
     612                 :     }
     613               0 :     if (mFeedChardet && aFromSegment) {
     614               0 :       rv = mChardet->DoIt((const char*)aFromSegment,
     615                 :                           // Avoid buffer boundary-dependent behavior when
     616                 :                           // reparsing is forbidden. If reparse is forbidden,
     617                 :                           // act as if we only saw the first 1024 bytes.
     618                 :                           // When reparsing isn't forbidden, buffer boundaries
     619                 :                           // can have an effect on whether the page is loaded
     620                 :                           // once or twice. :-(
     621                 :                           mReparseForbidden ? aCountToSniffingLimit : aCount,
     622               0 :                           &dontFeed);
     623               0 :       mFeedChardet = !dontFeed;
     624               0 :       NS_ENSURE_SUCCESS(rv, rv);
     625                 :     }
     626               0 :     if (mFeedChardet && (!aFromSegment || mReparseForbidden)) {
     627                 :       // mReparseForbidden is checked so that we get to use the sniffing
     628                 :       // buffer with the best guess so far if we aren't allowed to guess
     629                 :       // better later.
     630               0 :       mFeedChardet = false;
     631               0 :       rv = mChardet->Done();
     632               0 :       NS_ENSURE_SUCCESS(rv, rv);
     633                 :     }
     634                 :     // fall thru; callback may have changed charset  
     635                 :   }
     636               0 :   if (mCharsetSource == kCharsetUninitialized) {
     637                 :     // Hopefully this case is never needed, but dealing with it anyway
     638               0 :     mCharset.AssignLiteral("windows-1252");
     639               0 :     mCharsetSource = kCharsetFromWeakDocTypeDefault;
     640               0 :     mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     641               0 :   } else if (mMode == LOAD_AS_DATA &&
     642                 :              mCharsetSource == kCharsetFromWeakDocTypeDefault) {
     643               0 :     NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
     644               0 :     NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
     645               0 :     NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"),
     646                 :                  "XHR should default to UTF-8");
     647                 :     // Now mark charset source as non-weak to signal that we have a decision
     648               0 :     mCharsetSource = kCharsetFromDocTypeDefault;
     649               0 :     mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     650                 :   }
     651               0 :   return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
     652                 : }
     653                 : 
     654                 : nsresult
     655               0 : nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
     656                 :                                       PRUint32 aCount,
     657                 :                                       PRUint32* aWriteCount)
     658                 : {
     659               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     660               0 :   nsresult rv = NS_OK;
     661                 :   PRUint32 writeCount;
     662               0 :   for (PRUint32 i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
     663               0 :     switch (mBomState) {
     664                 :       case BOM_SNIFFING_NOT_STARTED:
     665               0 :         NS_ASSERTION(i == 0, "Bad BOM sniffing state.");
     666               0 :         switch (*aFromSegment) {
     667                 :           case 0xEF:
     668               0 :             mBomState = SEEN_UTF_8_FIRST_BYTE;
     669               0 :             break;
     670                 :           case 0xFF:
     671               0 :             mBomState = SEEN_UTF_16_LE_FIRST_BYTE;
     672               0 :             break;
     673                 :           case 0xFE:
     674               0 :             mBomState = SEEN_UTF_16_BE_FIRST_BYTE;
     675               0 :             break;
     676                 :           default:
     677               0 :             mBomState = BOM_SNIFFING_OVER;
     678               0 :             break;
     679                 :         }
     680               0 :         break;
     681                 :       case SEEN_UTF_16_LE_FIRST_BYTE:
     682               0 :         if (aFromSegment[i] == 0xFE) {
     683               0 :           rv = SetupDecodingFromBom("UTF-16", "UTF-16LE"); // upper case is the raw form
     684               0 :           NS_ENSURE_SUCCESS(rv, rv);
     685               0 :           PRUint32 count = aCount - (i + 1);
     686               0 :           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
     687               0 :           NS_ENSURE_SUCCESS(rv, rv);
     688               0 :           *aWriteCount = writeCount + (i + 1);
     689               0 :           return rv;
     690                 :         }
     691               0 :         mBomState = BOM_SNIFFING_OVER;
     692               0 :         break;
     693                 :       case SEEN_UTF_16_BE_FIRST_BYTE:
     694               0 :         if (aFromSegment[i] == 0xFF) {
     695               0 :           rv = SetupDecodingFromBom("UTF-16", "UTF-16BE"); // upper case is the raw form
     696               0 :           NS_ENSURE_SUCCESS(rv, rv);
     697               0 :           PRUint32 count = aCount - (i + 1);
     698               0 :           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
     699               0 :           NS_ENSURE_SUCCESS(rv, rv);
     700               0 :           *aWriteCount = writeCount + (i + 1);
     701               0 :           return rv;
     702                 :         }
     703               0 :         mBomState = BOM_SNIFFING_OVER;
     704               0 :         break;
     705                 :       case SEEN_UTF_8_FIRST_BYTE:
     706               0 :         if (aFromSegment[i] == 0xBB) {
     707               0 :           mBomState = SEEN_UTF_8_SECOND_BYTE;
     708                 :         } else {
     709               0 :           mBomState = BOM_SNIFFING_OVER;
     710                 :         }
     711               0 :         break;
     712                 :       case SEEN_UTF_8_SECOND_BYTE:
     713               0 :         if (aFromSegment[i] == 0xBF) {
     714               0 :           rv = SetupDecodingFromBom("UTF-8", "UTF-8"); // upper case is the raw form
     715               0 :           NS_ENSURE_SUCCESS(rv, rv);
     716               0 :           PRUint32 count = aCount - (i + 1);
     717               0 :           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
     718               0 :           NS_ENSURE_SUCCESS(rv, rv);
     719               0 :           *aWriteCount = writeCount + (i + 1);
     720               0 :           return rv;
     721                 :         }
     722               0 :         mBomState = BOM_SNIFFING_OVER;
     723               0 :         break;
     724                 :       default:
     725               0 :         mBomState = BOM_SNIFFING_OVER;
     726               0 :         break;
     727                 :     }
     728                 :   }
     729                 :   // if we get here, there either was no BOM or the BOM sniffing isn't complete yet
     730                 :   
     731               0 :   if (!mMetaScanner && (mMode == NORMAL ||
     732                 :                         mMode == VIEW_SOURCE_HTML ||
     733                 :                         mMode == LOAD_AS_DATA)) {
     734               0 :     mMetaScanner = new nsHtml5MetaScanner();
     735                 :   }
     736                 :   
     737               0 :   if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) {
     738                 :     // this is the last buffer
     739                 :     PRUint32 countToSniffingLimit =
     740               0 :         NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
     741               0 :     if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
     742                 :       nsHtml5ByteReadable readable(aFromSegment, aFromSegment +
     743               0 :           countToSniffingLimit);
     744               0 :       mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
     745               0 :       if (mUnicodeDecoder) {
     746               0 :         mUnicodeDecoder->SetInputErrorBehavior(
     747               0 :             nsIUnicodeDecoder::kOnError_Recover);
     748                 :         // meta scan successful
     749               0 :         mCharsetSource = kCharsetFromMetaPrescan;
     750               0 :         mFeedChardet = false;
     751               0 :         mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     752               0 :         mMetaScanner = nsnull;
     753                 :         return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount,
     754               0 :             aWriteCount);
     755                 :       }
     756                 :     }
     757                 :     return FinalizeSniffing(aFromSegment, aCount, aWriteCount,
     758               0 :         countToSniffingLimit);
     759                 :   }
     760                 : 
     761                 :   // not the last buffer
     762               0 :   if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
     763               0 :     nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
     764               0 :     mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
     765               0 :     if (mUnicodeDecoder) {
     766                 :       // meta scan successful
     767               0 :       mUnicodeDecoder->SetInputErrorBehavior(
     768               0 :           nsIUnicodeDecoder::kOnError_Recover);
     769               0 :       mCharsetSource = kCharsetFromMetaPrescan;
     770               0 :       mFeedChardet = false;
     771               0 :       mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     772               0 :       mMetaScanner = nsnull;
     773                 :       return WriteSniffingBufferAndCurrentSegment(aFromSegment, 
     774                 :                                                   aCount,
     775               0 :                                                   aWriteCount);
     776                 :     }
     777                 :   }
     778                 : 
     779               0 :   if (!mSniffingBuffer) {
     780               0 :     const mozilla::fallible_t fallible = mozilla::fallible_t();
     781                 :     mSniffingBuffer = new (fallible)
     782               0 :       PRUint8[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE];
     783               0 :     if (!mSniffingBuffer) {
     784               0 :       return NS_ERROR_OUT_OF_MEMORY;
     785                 :     }
     786                 :   }
     787               0 :   memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount);
     788               0 :   mSniffingLength += aCount;
     789               0 :   *aWriteCount = aCount;
     790               0 :   return NS_OK;
     791                 : }
     792                 : 
     793                 : nsresult
     794               0 : nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
     795                 :                                       PRUint32 aCount,
     796                 :                                       PRUint32* aWriteCount)
     797                 : {
     798               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     799                 :   // mLastBuffer always points to a buffer of the size
     800                 :   // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
     801               0 :   if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
     802                 :     nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
     803                 :       nsHtml5OwningUTF16Buffer::FalliblyCreate(
     804               0 :         NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     805               0 :     if (!newBuf) {
     806               0 :       return NS_ERROR_OUT_OF_MEMORY;
     807                 :     }
     808               0 :     mLastBuffer = (mLastBuffer->next = newBuf.forget());
     809                 :   }
     810               0 :   PRInt32 totalByteCount = 0;
     811               0 :   for (;;) {
     812               0 :     PRInt32 end = mLastBuffer->getEnd();
     813               0 :     PRInt32 byteCount = aCount - totalByteCount;
     814               0 :     PRInt32 utf16Count = NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE - end;
     815                 : 
     816               0 :     NS_ASSERTION(utf16Count, "Trying to convert into a buffer with no free space!");
     817                 :     // byteCount may be zero to force the decoder to output a pending surrogate
     818                 :     // pair.
     819                 : 
     820               0 :     nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count);
     821                 : 
     822               0 :     end += utf16Count;
     823               0 :     mLastBuffer->setEnd(end);
     824               0 :     totalByteCount += byteCount;
     825               0 :     aFromSegment += byteCount;
     826                 : 
     827               0 :     NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE,
     828                 :         "The Unicode decoder wrote too much data.");
     829               0 :     NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes.");
     830                 : 
     831               0 :     if (NS_FAILED(convResult)) {
     832                 :       // Using the more generic NS_FAILED test above in case there are still
     833                 :       // decoders around that don't use NS_ERROR_ILLEGAL_INPUT properly.
     834               0 :       NS_ASSERTION(convResult == NS_ERROR_ILLEGAL_INPUT,
     835                 :           "The decoder signaled an error other than NS_ERROR_ILLEGAL_INPUT.");
     836                 : 
     837                 :       // There's an illegal byte in the input. It's now the responsibility
     838                 :       // of this calling code to output a U+FFFD REPLACEMENT CHARACTER and
     839                 :       // reset the decoder.
     840                 : 
     841               0 :       if (totalByteCount < (PRInt32)aCount) {
     842                 :         // advance over the bad byte
     843               0 :         ++totalByteCount;
     844               0 :         ++aFromSegment;
     845                 :       } else {
     846               0 :         NS_NOTREACHED("The decoder signaled an error but consumed all input.");
     847                 :         // Recovering from this situation in case there are still broken
     848                 :         // decoders, since nsScanner had recovery code, too.
     849               0 :         totalByteCount = (PRInt32)aCount;
     850                 :       }
     851                 : 
     852                 :       // Emit the REPLACEMENT CHARACTER
     853               0 :       if (end >= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
     854                 :         nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
     855                 :           nsHtml5OwningUTF16Buffer::FalliblyCreate(
     856               0 :             NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     857               0 :         if (!newBuf) {
     858               0 :           return NS_ERROR_OUT_OF_MEMORY;
     859                 :         }
     860               0 :         mLastBuffer = (mLastBuffer->next = newBuf.forget());
     861               0 :         end = 0;
     862                 :       }
     863               0 :       mLastBuffer->getBuffer()[end] = 0xFFFD;
     864               0 :       ++end;
     865               0 :       mLastBuffer->setEnd(end);
     866               0 :       if (end >= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
     867                 :         nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
     868                 :           nsHtml5OwningUTF16Buffer::FalliblyCreate(
     869               0 :             NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     870               0 :         if (!newBuf) {
     871               0 :           return NS_ERROR_OUT_OF_MEMORY;
     872                 :         }
     873               0 :         mLastBuffer = (mLastBuffer->next = newBuf.forget());
     874                 :       }
     875                 : 
     876               0 :       mUnicodeDecoder->Reset();
     877               0 :       if (totalByteCount == (PRInt32)aCount) {
     878               0 :         *aWriteCount = (PRUint32)totalByteCount;
     879               0 :         return NS_OK;
     880                 :       }
     881               0 :     } else if (convResult == NS_PARTIAL_MORE_OUTPUT) {
     882                 :       nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
     883                 :         nsHtml5OwningUTF16Buffer::FalliblyCreate(
     884               0 :           NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     885               0 :       if (!newBuf) {
     886               0 :         return NS_ERROR_OUT_OF_MEMORY;
     887                 :       }
     888               0 :       mLastBuffer = (mLastBuffer->next = newBuf.forget());
     889                 :       // All input may have been consumed if there is a pending surrogate pair
     890                 :       // that doesn't fit in the output buffer. Loop back to push a zero-length
     891                 :       // input to the decoder in that case.
     892                 :     } else {
     893               0 :       NS_ASSERTION(totalByteCount == (PRInt32)aCount,
     894                 :           "The Unicode decoder consumed the wrong number of bytes.");
     895               0 :       *aWriteCount = (PRUint32)totalByteCount;
     896               0 :       return NS_OK;
     897                 :     }
     898                 :   }
     899                 : }
     900                 : 
     901                 : // nsIRequestObserver methods:
     902                 : nsresult
     903               0 : nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
     904                 : {
     905               0 :   NS_PRECONDITION(STREAM_NOT_STARTED == mStreamState,
     906                 :                   "Got OnStartRequest when the stream had already started.");
     907               0 :   NS_PRECONDITION(!mExecutor->HasStarted(), 
     908                 :                   "Got OnStartRequest at the wrong stage in the executor life cycle.");
     909               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     910               0 :   if (mObserver) {
     911               0 :     mObserver->OnStartRequest(aRequest, aContext);
     912                 :   }
     913               0 :   mRequest = aRequest;
     914                 : 
     915               0 :   mStreamState = STREAM_BEING_READ;
     916                 : 
     917               0 :   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
     918               0 :     mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
     919                 :   }
     920                 : 
     921                 :   // For View Source, the parser should run with scripts "enabled" if a normal
     922                 :   // load would have scripts enabled.
     923                 :   bool scriptingEnabled = mMode == LOAD_AS_DATA ?
     924               0 :                                    false : mExecutor->IsScriptEnabled();
     925               0 :   mOwner->StartTokenizer(scriptingEnabled);
     926               0 :   mTreeBuilder->setScriptingEnabled(scriptingEnabled);
     927               0 :   mTokenizer->start();
     928               0 :   mExecutor->Start();
     929               0 :   mExecutor->StartReadingFromStage();
     930                 : 
     931               0 :   if (mMode == PLAIN_TEXT) {
     932               0 :     mTreeBuilder->StartPlainText();
     933               0 :     mTokenizer->StartPlainText();
     934               0 :   } else if (mMode == VIEW_SOURCE_PLAIN) {
     935               0 :     mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
     936               0 :     mTokenizer->StartPlainText();
     937                 :   }
     938                 : 
     939                 :   /*
     940                 :    * If you move the following line, be very careful not to cause 
     941                 :    * WillBuildModel to be called before the document has had its 
     942                 :    * script global object set.
     943                 :    */
     944               0 :   mExecutor->WillBuildModel(eDTDMode_unknown);
     945                 :   
     946                 :   nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
     947                 :     nsHtml5OwningUTF16Buffer::FalliblyCreate(
     948               0 :       NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     949               0 :   if (!newBuf) {
     950               0 :     mExecutor->MarkAsBroken(); // marks this stream parser as terminated,
     951                 :                                // which prevents entry to code paths that
     952                 :                                // would use mFirstBuffer or mLastBuffer.
     953               0 :     return NS_ERROR_OUT_OF_MEMORY;
     954                 :   }
     955               0 :   NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?");
     956               0 :   NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?");
     957               0 :   mFirstBuffer = mLastBuffer = newBuf;
     958                 : 
     959               0 :   nsresult rv = NS_OK;
     960                 : 
     961                 :   // The line below means that the encoding can end up being wrong if
     962                 :   // a view-source URL is loaded without having the encoding hint from a
     963                 :   // previous normal load in the history.
     964               0 :   mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT);
     965                 : 
     966               0 :   nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv));
     967               0 :   if (NS_SUCCEEDED(rv)) {
     968               0 :     nsCAutoString method;
     969               0 :     httpChannel->GetRequestMethod(method);
     970                 :     // XXX does Necko have a way to renavigate POST, etc. without hitting
     971                 :     // the network?
     972               0 :     if (!method.EqualsLiteral("GET")) {
     973                 :       // This is the old Gecko behavior but the HTML5 spec disagrees.
     974                 :       // Don't reparse on POST.
     975               0 :       mReparseForbidden = true;
     976               0 :       mFeedChardet = false; // can't restart anyway
     977                 :     }
     978                 :   }
     979                 : 
     980               0 :   if (mCharsetSource >= kCharsetFromAutoDetection) {
     981               0 :     mFeedChardet = false;
     982                 :   }
     983                 :   
     984               0 :   if (mCharsetSource <= kCharsetFromMetaPrescan) {
     985                 :     // we aren't ready to commit to an encoding yet
     986                 :     // leave converter uninstantiated for now
     987               0 :     return NS_OK;
     988                 :   }
     989                 :   
     990               0 :   nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
     991               0 :   NS_ENSURE_SUCCESS(rv, rv);
     992               0 :   rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
     993                 :   // if we failed to get a decoder, there will be fallback, so don't propagate
     994                 :   //  the error.
     995               0 :   if (NS_SUCCEEDED(rv)) {
     996               0 :     mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
     997                 :   } else {
     998               0 :     mCharsetSource = kCharsetFromWeakDocTypeDefault;
     999                 :   }
    1000               0 :   return NS_OK;
    1001                 : }
    1002                 : 
    1003                 : void
    1004               0 : nsHtml5StreamParser::DoStopRequest()
    1005                 : {
    1006               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1007               0 :   NS_PRECONDITION(STREAM_BEING_READ == mStreamState,
    1008                 :                   "Stream ended without being open.");
    1009               0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1010                 : 
    1011               0 :   if (IsTerminated()) {
    1012               0 :     return;
    1013                 :   }
    1014                 : 
    1015               0 :   mStreamState = STREAM_ENDED;
    1016                 : 
    1017               0 :   if (!mUnicodeDecoder) {
    1018                 :     PRUint32 writeCount;
    1019               0 :     if (NS_FAILED(FinalizeSniffing(nsnull, 0, &writeCount, 0))) {
    1020               0 :       MarkAsBroken();
    1021               0 :       return;
    1022                 :     }
    1023               0 :   } else if (mFeedChardet) {
    1024               0 :     mChardet->Done();
    1025                 :   }
    1026                 : 
    1027               0 :   if (IsTerminatedOrInterrupted()) {
    1028               0 :     return;
    1029                 :   }
    1030                 : 
    1031               0 :   ParseAvailableData(); 
    1032                 : }
    1033                 : 
    1034                 : class nsHtml5RequestStopper : public nsRunnable
    1035               0 : {
    1036                 :   private:
    1037                 :     nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
    1038                 :   public:
    1039               0 :     nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser)
    1040               0 :       : mStreamParser(aStreamParser)
    1041               0 :     {}
    1042               0 :     NS_IMETHODIMP Run()
    1043                 :     {
    1044               0 :       mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
    1045               0 :       mStreamParser->DoStopRequest();
    1046               0 :       return NS_OK;
    1047                 :     }
    1048                 : };
    1049                 : 
    1050                 : nsresult
    1051               0 : nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest,
    1052                 :                              nsISupports* aContext,
    1053                 :                              nsresult status)
    1054                 : {
    1055               0 :   NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream.");
    1056               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1057               0 :   if (mObserver) {
    1058               0 :     mObserver->OnStopRequest(aRequest, aContext, status);
    1059                 :   }
    1060               0 :   nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this);
    1061               0 :   if (NS_FAILED(mThread->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) {
    1062               0 :     NS_WARNING("Dispatching StopRequest event failed.");
    1063                 :   }
    1064               0 :   return NS_OK;
    1065                 : }
    1066                 : 
    1067                 : void
    1068               0 : nsHtml5StreamParser::DoDataAvailable(PRUint8* aBuffer, PRUint32 aLength)
    1069                 : {
    1070               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1071               0 :   NS_PRECONDITION(STREAM_BEING_READ == mStreamState,
    1072                 :                   "DoDataAvailable called when stream not open.");
    1073               0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1074                 : 
    1075               0 :   if (IsTerminated()) {
    1076               0 :     return;
    1077                 :   }
    1078                 : 
    1079                 :   PRUint32 writeCount;
    1080                 :   nsresult rv;
    1081               0 :   if (HasDecoder()) {
    1082               0 :     if (mFeedChardet) {
    1083                 :       bool dontFeed;
    1084               0 :       mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed);
    1085               0 :       mFeedChardet = !dontFeed;
    1086                 :     }
    1087               0 :     rv = WriteStreamBytes(aBuffer, aLength, &writeCount);
    1088                 :   } else {
    1089               0 :     rv = SniffStreamBytes(aBuffer, aLength, &writeCount);
    1090                 :   }
    1091               0 :   if (NS_FAILED(rv)) {
    1092               0 :     MarkAsBroken();
    1093               0 :     return;
    1094                 :   }
    1095               0 :   NS_ASSERTION(writeCount == aLength, "Wrong number of stream bytes written/sniffed.");
    1096                 : 
    1097               0 :   if (IsTerminatedOrInterrupted()) {
    1098               0 :     return;
    1099                 :   }
    1100                 : 
    1101               0 :   ParseAvailableData();
    1102                 : 
    1103               0 :   if (mFlushTimerArmed || mSpeculating) {
    1104               0 :     return;
    1105                 :   }
    1106                 : 
    1107               0 :   mFlushTimer->InitWithFuncCallback(nsHtml5StreamParser::TimerCallback,
    1108                 :                                     static_cast<void*> (this),
    1109                 :                                     mFlushTimerEverFired ?
    1110                 :                                         sTimerInitialDelay :
    1111                 :                                         sTimerSubsequentDelay,
    1112               0 :                                     nsITimer::TYPE_ONE_SHOT);
    1113               0 :   mFlushTimerArmed = true;
    1114                 : }
    1115                 : 
    1116                 : class nsHtml5DataAvailable : public nsRunnable
    1117               0 : {
    1118                 :   private:
    1119                 :     nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
    1120                 :     nsAutoArrayPtr<PRUint8>            mData;
    1121                 :     PRUint32                           mLength;
    1122                 :   public:
    1123               0 :     nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser,
    1124                 :                          PRUint8*             aData,
    1125                 :                          PRUint32             aLength)
    1126                 :       : mStreamParser(aStreamParser)
    1127                 :       , mData(aData)
    1128               0 :       , mLength(aLength)
    1129               0 :     {}
    1130               0 :     NS_IMETHODIMP Run()
    1131                 :     {
    1132               0 :       mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
    1133               0 :       mStreamParser->DoDataAvailable(mData, mLength);
    1134               0 :       return NS_OK;
    1135                 :     }
    1136                 : };
    1137                 : 
    1138                 : // nsIStreamListener method:
    1139                 : nsresult
    1140               0 : nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
    1141                 :                                nsISupports* aContext,
    1142                 :                                nsIInputStream* aInStream,
    1143                 :                                PRUint32 aSourceOffset,
    1144                 :                                PRUint32 aLength)
    1145                 : {
    1146               0 :   if (mExecutor->IsBroken()) {
    1147               0 :     return NS_ERROR_OUT_OF_MEMORY;
    1148                 :   }
    1149                 : 
    1150               0 :   NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream.");
    1151                 :   PRUint32 totalRead;
    1152               0 :   const mozilla::fallible_t fallible = mozilla::fallible_t();
    1153               0 :   nsAutoArrayPtr<PRUint8> data(new (fallible) PRUint8[aLength]);
    1154               0 :   if (!data) {
    1155               0 :     mExecutor->MarkAsBroken();
    1156               0 :     return NS_ERROR_OUT_OF_MEMORY;
    1157                 :   }
    1158               0 :   nsresult rv = aInStream->Read(reinterpret_cast<char*>(data.get()),
    1159               0 :   aLength, &totalRead);
    1160               0 :   NS_ENSURE_SUCCESS(rv, rv);
    1161               0 :   NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?");
    1162                 :   nsCOMPtr<nsIRunnable> dataAvailable = new nsHtml5DataAvailable(this,
    1163                 :                                                                  data.forget(),
    1164               0 :                                                                 totalRead);
    1165               0 :   if (NS_FAILED(mThread->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) {
    1166               0 :     NS_WARNING("Dispatching DataAvailable event failed.");
    1167                 :   }
    1168               0 :   return rv;
    1169                 : }
    1170                 : 
    1171                 : bool
    1172               0 : nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
    1173                 : {
    1174               0 :   nsCAutoString newEncoding(aEncoding);
    1175               0 :   newEncoding.Trim(" \t\r\n\f");
    1176               0 :   if (newEncoding.LowerCaseEqualsLiteral("utf-16") ||
    1177               0 :       newEncoding.LowerCaseEqualsLiteral("utf-16be") ||
    1178               0 :       newEncoding.LowerCaseEqualsLiteral("utf-16le")) {
    1179               0 :     newEncoding.Assign("UTF-8");
    1180                 :   }
    1181                 : 
    1182               0 :   nsresult rv = NS_OK;
    1183                 :   bool eq;
    1184               0 :   rv = nsCharsetAlias::Equals(newEncoding, mCharset, &eq);
    1185               0 :   if (NS_FAILED(rv)) {
    1186               0 :     NS_NOTREACHED("Charset name equality check failed.");
    1187               0 :     return false;
    1188                 :   }
    1189               0 :   if (eq) {
    1190               0 :     mCharsetSource = kCharsetFromMetaTag; // become confident
    1191               0 :     mFeedChardet = false; // don't feed chardet when confident
    1192               0 :     return false;
    1193                 :   }
    1194                 :   
    1195                 :   // XXX check HTML5 non-IANA aliases here
    1196                 :   
    1197               0 :   nsCAutoString preferred;
    1198                 :   
    1199               0 :   rv = nsCharsetAlias::GetPreferred(newEncoding, preferred);
    1200               0 :   if (NS_FAILED(rv)) {
    1201                 :     // the encoding name is bogus
    1202               0 :     return false;
    1203                 :   }
    1204                 :   
    1205               0 :   if (preferred.LowerCaseEqualsLiteral("utf-16") ||
    1206               0 :       preferred.LowerCaseEqualsLiteral("utf-16be") ||
    1207               0 :       preferred.LowerCaseEqualsLiteral("utf-16le") ||
    1208               0 :       preferred.LowerCaseEqualsLiteral("utf-7") ||
    1209               0 :       preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
    1210               0 :       preferred.LowerCaseEqualsLiteral("x-jis0208") ||
    1211               0 :       preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
    1212               0 :       preferred.LowerCaseEqualsLiteral("x-user-defined")) {
    1213                 :     // Not a rough ASCII superset
    1214               0 :     return false;
    1215                 :   }
    1216               0 :   aEncoding.Assign(preferred);
    1217               0 :   return true;
    1218                 : }
    1219                 : 
    1220                 : bool
    1221               0 : nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
    1222                 : {
    1223                 :   // This code needs to stay in sync with
    1224                 :   // nsHtml5MetaScanner::tryCharset. Unfortunately, the
    1225                 :   // trickery with member fields there leads to some copy-paste reuse. :-(
    1226               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1227               0 :   if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
    1228               0 :     return false;
    1229                 :   }
    1230                 : 
    1231               0 :   if (mReparseForbidden) {
    1232               0 :     return false; // not reparsing even if we wanted to
    1233                 :   }
    1234                 : 
    1235               0 :   nsCAutoString newEncoding;
    1236               0 :   CopyUTF16toUTF8(*aEncoding, newEncoding);
    1237                 : 
    1238               0 :   if (!PreferredForInternalEncodingDecl(newEncoding)) {
    1239               0 :     return false;
    1240                 :   }
    1241                 : 
    1242                 :   // Avoid having the chardet ask for another restart after this restart
    1243                 :   // request.
    1244               0 :   mFeedChardet = false;
    1245               0 :   mTreeBuilder->NeedsCharsetSwitchTo(newEncoding, kCharsetFromMetaTag);
    1246               0 :   FlushTreeOpsAndDisarmTimer();
    1247               0 :   Interrupt();
    1248                 :   // the tree op executor will cause the stream parser to terminate
    1249                 :   // if the charset switch request is accepted or it'll uninterrupt 
    1250                 :   // if the request failed. Note that if the restart request fails,
    1251                 :   // we don't bother trying to make chardet resume. Might as well
    1252                 :   // assume that chardet-requested restarts would fail, too.
    1253               0 :   return true;
    1254                 : }
    1255                 : 
    1256                 : void
    1257               0 : nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer()
    1258                 : {
    1259               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1260               0 :   if (mFlushTimerArmed) {
    1261                 :     // avoid calling Cancel if the flush timer isn't armed to avoid acquiring
    1262                 :     // a mutex
    1263               0 :     mFlushTimer->Cancel();
    1264               0 :     mFlushTimerArmed = false;
    1265                 :   }
    1266               0 :   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
    1267               0 :     mTokenizer->FlushViewSource();
    1268                 :   }
    1269               0 :   mTreeBuilder->Flush();
    1270               0 :   if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
    1271               0 :     NS_WARNING("failed to dispatch executor flush event");
    1272                 :   }
    1273               0 : }
    1274                 : 
    1275                 : void
    1276               0 : nsHtml5StreamParser::ParseAvailableData()
    1277                 : {
    1278               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1279               0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1280                 : 
    1281               0 :   if (IsTerminatedOrInterrupted()) {
    1282               0 :     return;
    1283                 :   }
    1284                 :   
    1285               0 :   for (;;) {
    1286               0 :     if (!mFirstBuffer->hasMore()) {
    1287               0 :       if (mFirstBuffer == mLastBuffer) {
    1288               0 :         switch (mStreamState) {
    1289                 :           case STREAM_BEING_READ:
    1290                 :             // never release the last buffer.
    1291               0 :             if (!mSpeculating) {
    1292                 :               // reuse buffer space if not speculating
    1293               0 :               mFirstBuffer->setStart(0);
    1294               0 :               mFirstBuffer->setEnd(0);
    1295                 :             }
    1296               0 :             mTreeBuilder->FlushLoads();
    1297                 :             // Dispatch this runnable unconditionally, because the loads
    1298                 :             // that need flushing may have been flushed earlier even if the
    1299                 :             // flush right above here did nothing.
    1300               0 :             if (NS_FAILED(NS_DispatchToMainThread(mLoadFlusher))) {
    1301               0 :               NS_WARNING("failed to dispatch load flush event");
    1302                 :             }
    1303               0 :             return; // no more data for now but expecting more
    1304                 :           case STREAM_ENDED:
    1305               0 :             if (mAtEOF) {
    1306               0 :               return;
    1307                 :             }
    1308               0 :             mAtEOF = true;
    1309               0 :             mTokenizer->eof();
    1310               0 :             mTreeBuilder->StreamEnded();
    1311               0 :             if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
    1312               0 :               mTokenizer->EndViewSource();
    1313                 :             }
    1314               0 :             FlushTreeOpsAndDisarmTimer();
    1315               0 :             return; // no more data and not expecting more
    1316                 :           default:
    1317               0 :             NS_NOTREACHED("It should be impossible to reach this.");
    1318               0 :             return;
    1319                 :         }
    1320                 :       }
    1321               0 :       mFirstBuffer = mFirstBuffer->next;
    1322               0 :       continue;
    1323                 :     }
    1324                 : 
    1325                 :     // now we have a non-empty buffer
    1326               0 :     mFirstBuffer->adjust(mLastWasCR);
    1327               0 :     mLastWasCR = false;
    1328               0 :     if (mFirstBuffer->hasMore()) {
    1329               0 :       mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
    1330                 :       // At this point, internalEncodingDeclaration() may have called 
    1331                 :       // Terminate, but that never happens together with script.
    1332                 :       // Can't assert that here, though, because it's possible that the main
    1333                 :       // thread has called Terminate() while this thread was parsing.
    1334               0 :       if (mMode == NORMAL && mTreeBuilder->HasScript()) {
    1335               0 :         mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
    1336                 :         nsHtml5Speculation* speculation = 
    1337                 :           new nsHtml5Speculation(mFirstBuffer,
    1338               0 :                                  mFirstBuffer->getStart(),
    1339               0 :                                  mTokenizer->getLineNumber(),
    1340               0 :                                  mTreeBuilder->newSnapshot());
    1341                 :         mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(), 
    1342               0 :                                           speculation->GetStartLineNumber());
    1343               0 :         FlushTreeOpsAndDisarmTimer();
    1344               0 :         mTreeBuilder->SetOpSink(speculation);
    1345               0 :         mSpeculations.AppendElement(speculation); // adopts the pointer
    1346               0 :         mSpeculating = true;
    1347                 :       }
    1348               0 :       if (IsTerminatedOrInterrupted()) {
    1349               0 :         return;
    1350                 :       }
    1351                 :     }
    1352               0 :     continue;
    1353                 :   }
    1354                 : }
    1355                 : 
    1356                 : class nsHtml5StreamParserContinuation : public nsRunnable
    1357               0 : {
    1358                 : private:
    1359                 :   nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
    1360                 : public:
    1361               0 :   nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser)
    1362               0 :     : mStreamParser(aStreamParser)
    1363               0 :   {}
    1364               0 :   NS_IMETHODIMP Run()
    1365                 :   {
    1366               0 :     mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
    1367               0 :     mStreamParser->Uninterrupt();
    1368               0 :     mStreamParser->ParseAvailableData();
    1369               0 :     return NS_OK;
    1370                 :   }
    1371                 : };
    1372                 : 
    1373                 : void
    1374               0 : nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer, 
    1375                 :                                           nsHtml5TreeBuilder* aTreeBuilder,
    1376                 :                                           bool aLastWasCR)
    1377                 : {
    1378               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1379               0 :   NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML),
    1380                 :       "ContinueAfterScripts called in view source mode!");
    1381               0 :   if (mExecutor->IsBroken()) {
    1382               0 :     return;
    1383                 :   }
    1384                 :   #ifdef DEBUG
    1385               0 :     mExecutor->AssertStageEmpty();
    1386                 :   #endif
    1387               0 :   bool speculationFailed = false;
    1388                 :   {
    1389               0 :     mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
    1390               0 :     if (mSpeculations.IsEmpty()) {
    1391               0 :       NS_NOTREACHED("ContinueAfterScripts called without speculations.");
    1392                 :       return;
    1393                 :     }
    1394               0 :     nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
    1395               0 :     if (aLastWasCR || 
    1396               0 :         !aTokenizer->isInDataState() || 
    1397               0 :         !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) {
    1398               0 :       speculationFailed = true;
    1399                 :       // We've got a failed speculation :-(
    1400               0 :       Interrupt(); // Make the parser thread release the tokenizer mutex sooner
    1401                 :       // now fall out of the speculationAutoLock into the tokenizerAutoLock block
    1402                 :     } else {
    1403                 :       // We've got a successful speculation!
    1404               0 :       if (mSpeculations.Length() > 1) {
    1405                 :         // the first speculation isn't the current speculation, so there's 
    1406                 :         // no need to bother the parser thread.
    1407               0 :         speculation->FlushToSink(mExecutor);
    1408               0 :         NS_ASSERTION(!mExecutor->IsScriptExecuting(),
    1409                 :           "ParseUntilBlocked() was supposed to ensure we don't come "
    1410                 :           "here when scripts are executing.");
    1411               0 :         NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if "
    1412                 :           "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
    1413                 :           "only caller of this method?");
    1414               0 :         mSpeculations.RemoveElementAt(0);
    1415                 :         return;
    1416                 :       }
    1417                 :       // else
    1418               0 :       Interrupt(); // Make the parser thread release the tokenizer mutex sooner
    1419                 :       
    1420                 :       // now fall through
    1421                 :       // the first speculation is the current speculation. Need to 
    1422                 :       // release the the speculation mutex and acquire the tokenizer 
    1423                 :       // mutex. (Just acquiring the other mutex here would deadlock)
    1424                 :     }
    1425                 :   }
    1426                 :   {
    1427               0 :     mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex);
    1428                 :     #ifdef DEBUG
    1429                 :     {
    1430               0 :       nsCOMPtr<nsIThread> mainThread;
    1431               0 :       NS_GetMainThread(getter_AddRefs(mainThread));
    1432               0 :       mAtomTable.SetPermittedLookupThread(mainThread);
    1433                 :     }
    1434                 :     #endif
    1435                 :     // In principle, the speculation mutex should be acquired here,
    1436                 :     // but there's no point, because the parser thread only acquires it
    1437                 :     // when it has also acquired the tokenizer mutex and we are already
    1438                 :     // holding the tokenizer mutex.
    1439               0 :     if (speculationFailed) {
    1440                 :       // Rewind the stream
    1441               0 :       mAtEOF = false;
    1442               0 :       nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
    1443               0 :       mFirstBuffer = speculation->GetBuffer();
    1444               0 :       mFirstBuffer->setStart(speculation->GetStart());
    1445               0 :       mTokenizer->setLineNumber(speculation->GetStartLineNumber());
    1446                 : 
    1447                 :       nsContentUtils::ReportToConsole(nsIScriptError::warningFlag,
    1448                 :                                       "DOM Events",
    1449                 :                                       mExecutor->GetDocument(),
    1450                 :                                       nsContentUtils::eDOM_PROPERTIES,
    1451                 :                                       "SpeculationFailed",
    1452                 :                                       nsnull, 0,
    1453                 :                                       nsnull,
    1454               0 :                                       EmptyString(),
    1455               0 :                                       speculation->GetStartLineNumber());
    1456                 : 
    1457               0 :       nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next;
    1458               0 :       while (buffer) {
    1459               0 :         buffer->setStart(0);
    1460               0 :         buffer = buffer->next;
    1461                 :       }
    1462                 :       
    1463               0 :       mSpeculations.Clear(); // potentially a huge number of destructors 
    1464                 :                              // run here synchronously on the main thread...
    1465                 : 
    1466               0 :       mTreeBuilder->flushCharacters(); // empty the pending buffer
    1467               0 :       mTreeBuilder->ClearOps(); // now get rid of the failed ops
    1468                 : 
    1469               0 :       mTreeBuilder->SetOpSink(mExecutor->GetStage());
    1470               0 :       mExecutor->StartReadingFromStage();
    1471               0 :       mSpeculating = false;
    1472                 : 
    1473                 :       // Copy state over
    1474               0 :       mLastWasCR = aLastWasCR;
    1475               0 :       mTokenizer->loadState(aTokenizer);
    1476               0 :       mTreeBuilder->loadState(aTreeBuilder, &mAtomTable);
    1477                 :     } else {    
    1478                 :       // We've got a successful speculation and at least a moment ago it was
    1479                 :       // the current speculation
    1480               0 :       mSpeculations.ElementAt(0)->FlushToSink(mExecutor);
    1481               0 :       NS_ASSERTION(!mExecutor->IsScriptExecuting(),
    1482                 :         "ParseUntilBlocked() was supposed to ensure we don't come "
    1483                 :         "here when scripts are executing.");
    1484               0 :       NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if "
    1485                 :         "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
    1486                 :         "only caller of this method?");
    1487               0 :       mSpeculations.RemoveElementAt(0);
    1488               0 :       if (mSpeculations.IsEmpty()) {
    1489                 :         // yes, it was still the only speculation. Now stop speculating
    1490                 :         // However, before telling the executor to read from stage, flush
    1491                 :         // any pending ops straight to the executor, because otherwise
    1492                 :         // they remain unflushed until we get more data from the network.
    1493               0 :         mTreeBuilder->SetOpSink(mExecutor);
    1494               0 :         mTreeBuilder->Flush(true);
    1495               0 :         mTreeBuilder->SetOpSink(mExecutor->GetStage());
    1496               0 :         mExecutor->StartReadingFromStage();
    1497               0 :         mSpeculating = false;
    1498                 :       }
    1499                 :     }
    1500               0 :     nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
    1501               0 :     if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
    1502               0 :       NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
    1503                 :     }
    1504                 :     // A stream event might run before this event runs, but that's harmless.
    1505                 :     #ifdef DEBUG
    1506               0 :       mAtomTable.SetPermittedLookupThread(mThread);
    1507                 :     #endif
    1508                 :   }
    1509                 : }
    1510                 : 
    1511                 : void
    1512               0 : nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch()
    1513                 : {
    1514               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1515               0 :   nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
    1516               0 :   if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
    1517               0 :     NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
    1518                 :   }
    1519               0 : }
    1520                 : 
    1521                 : class nsHtml5TimerKungFu : public nsRunnable
    1522               0 : {
    1523                 : private:
    1524                 :   nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
    1525                 : public:
    1526               0 :   nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser)
    1527               0 :     : mStreamParser(aStreamParser)
    1528               0 :   {}
    1529               0 :   NS_IMETHODIMP Run()
    1530                 :   {
    1531               0 :     if (mStreamParser->mFlushTimer) {
    1532               0 :       mStreamParser->mFlushTimer->Cancel();
    1533               0 :       mStreamParser->mFlushTimer = nsnull;
    1534                 :     }
    1535               0 :     return NS_OK;
    1536                 :   }
    1537                 : };
    1538                 : 
    1539                 : void
    1540               0 : nsHtml5StreamParser::DropTimer()
    1541                 : {
    1542               0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1543                 :   /*
    1544                 :    * Simply nulling out the timer wouldn't work, because if the timer is
    1545                 :    * armed, it needs to be canceled first. Simply canceling it first wouldn't
    1546                 :    * work, because nsTimerImpl::Cancel is not safe for calling from outside
    1547                 :    * the thread where nsTimerImpl::Fire would run. It's not safe to
    1548                 :    * dispatch a runnable to cancel the timer from the destructor of this
    1549                 :    * class, because the timer has a weak (void*) pointer back to this instance
    1550                 :    * of the stream parser and having the timer fire before the runnable
    1551                 :    * cancels it would make the timer access a deleted object.
    1552                 :    *
    1553                 :    * This DropTimer method addresses these issues. This method must be called
    1554                 :    * on the main thread before the destructor of this class is reached.
    1555                 :    * The nsHtml5TimerKungFu object has an nsHtml5RefPtr that addrefs this
    1556                 :    * stream parser object to keep it alive until the runnable is done.
    1557                 :    * The runnable cancels the timer on the parser thread, drops the timer
    1558                 :    * and lets nsHtml5RefPtr send a runnable back to the main thread to
    1559                 :    * release the stream parser.
    1560                 :    */
    1561               0 :   if (mFlushTimer) {
    1562               0 :     nsCOMPtr<nsIRunnable> event = new nsHtml5TimerKungFu(this);
    1563               0 :     if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
    1564               0 :       NS_WARNING("Failed to dispatch TimerKungFu event");
    1565                 :     }
    1566                 :   }
    1567               0 : }
    1568                 : 
    1569                 : // Using a static, because the method name Notify is taken by the chardet 
    1570                 : // callback.
    1571                 : void
    1572               0 : nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure)
    1573                 : {
    1574               0 :   (static_cast<nsHtml5StreamParser*> (aClosure))->TimerFlush();
    1575               0 : }
    1576                 : 
    1577                 : void
    1578               0 : nsHtml5StreamParser::TimerFlush()
    1579                 : {
    1580               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1581               0 :   mozilla::MutexAutoLock autoLock(mTokenizerMutex);
    1582                 : 
    1583               0 :   NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating.");
    1584                 : 
    1585                 :   // The timer fired if we got here. No need to cancel it. Mark it as
    1586                 :   // not armed, though.
    1587               0 :   mFlushTimerArmed = false;
    1588                 : 
    1589               0 :   mFlushTimerEverFired = true;
    1590                 : 
    1591               0 :   if (IsTerminatedOrInterrupted()) {
    1592                 :     return;
    1593                 :   }
    1594                 : 
    1595               0 :   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
    1596               0 :     mTreeBuilder->Flush(); // delete useless ops
    1597               0 :     if (mTokenizer->FlushViewSource()) {
    1598               0 :        if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
    1599               0 :          NS_WARNING("failed to dispatch executor flush event");
    1600                 :        }
    1601                 :      }
    1602                 :   } else {
    1603                 :     // we aren't speculating and we don't know when new data is
    1604                 :     // going to arrive. Send data to the main thread.
    1605               0 :     if (mTreeBuilder->Flush(true)) {
    1606               0 :       if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
    1607               0 :         NS_WARNING("failed to dispatch executor flush event");
    1608                 :       }
    1609                 :     }
    1610                 :   }
    1611                 : }
    1612                 : 
    1613                 : void
    1614               0 : nsHtml5StreamParser::MarkAsBroken()
    1615                 : {
    1616               0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1617               0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1618                 : 
    1619               0 :   Terminate();
    1620               0 :   mTreeBuilder->MarkAsBroken();
    1621               0 :   mozilla::DebugOnly<bool> hadOps = mTreeBuilder->Flush(false);
    1622               0 :   NS_ASSERTION(hadOps, "Should have had the markAsBroken op!");
    1623               0 :   if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
    1624               0 :     NS_WARNING("failed to dispatch executor flush event");
    1625                 :   }
    1626            4392 : }

Generated by: LCOV version 1.7