LCOV - code coverage report
Current view: directory - parser/htmlparser/src - nsHTMLTokens.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 1006 328 32.6 %
Date: 2012-06-02 Functions: 100 40 40.0 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* vim: set ts=2 sw=2 et tw=78: */
       3                 : /* ***** BEGIN LICENSE BLOCK *****
       4                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       5                 :  *
       6                 :  * The contents of this file are subject to the Mozilla Public License Version
       7                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       8                 :  * the License. You may obtain a copy of the License at
       9                 :  * http://www.mozilla.org/MPL/
      10                 :  *
      11                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      12                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      13                 :  * for the specific language governing rights and limitations under the
      14                 :  * License.
      15                 :  *
      16                 :  * The Original Code is mozilla.org code.
      17                 :  *
      18                 :  * The Initial Developer of the Original Code is
      19                 :  * Netscape Communications Corporation.
      20                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      21                 :  * the Initial Developer. All Rights Reserved.
      22                 :  *
      23                 :  * Contributor(s):
      24                 :  *   Blake Kaplan <mrbkap@gmail.com>
      25                 :  *
      26                 :  * Alternatively, the contents of this file may be used under the terms of
      27                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      28                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      29                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      30                 :  * of those above. If you wish to allow use of your version of this file only
      31                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      32                 :  * use your version of this file under the terms of the MPL, indicate your
      33                 :  * decision by deleting the provisions above and replace them with the notice
      34                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      35                 :  * the provisions above, a recipient may use your version of this file under
      36                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      37                 :  *
      38                 :  * ***** END LICENSE BLOCK ***** */
      39                 : 
      40                 : #include <ctype.h>
      41                 : #include <time.h>
      42                 : #include <stdio.h>
      43                 : #include "nsScanner.h"
      44                 : #include "nsToken.h"
      45                 : #include "nsHTMLTokens.h"
      46                 : #include "prtypes.h"
      47                 : #include "nsDebug.h"
      48                 : #include "nsHTMLTags.h"
      49                 : #include "nsHTMLEntities.h"
      50                 : #include "nsCRT.h"
      51                 : #include "nsReadableUtils.h"
      52                 : #include "nsUnicharUtils.h"
      53                 : #include "nsScanner.h"
      54                 : #include "nsParserConstants.h"
      55                 : 
      56                 : static const PRUnichar sUserdefined[] = {'u', 's', 'e', 'r', 'd', 'e', 'f',
      57                 :                                          'i', 'n', 'e', 'd', 0};
      58                 : 
      59                 : static const PRUnichar kAttributeTerminalChars[] = {
      60                 :   PRUnichar('&'), PRUnichar('\t'), PRUnichar('\n'),
      61                 :   PRUnichar('\r'), PRUnichar(' '), PRUnichar('>'),
      62                 :   PRUnichar(0)
      63                 : };
      64                 : 
      65                 : static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue);
      66                 : /**
      67                 :  * Consumes an entity from aScanner and expands it into aString.
      68                 :  *
      69                 :  * @param   aString The target string to append the entity to.
      70                 :  * @param   aScanner Controller of underlying input source
      71                 :  * @param   aIECompatible Controls whether we respect entities with values >
      72                 :  *                        255 and no terminating semicolon.
      73                 :  * @param   aFlag If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
      74                 :  * @return  error result
      75                 :  */
      76                 : static nsresult
      77              11 : ConsumeEntity(nsScannerSharedSubstring& aString,
      78                 :               nsScanner& aScanner,
      79                 :               bool aIECompatible,
      80                 :               PRInt32 aFlag)
      81                 : {
      82              11 :   nsresult result = NS_OK;
      83                 : 
      84                 :   PRUnichar ch;
      85              11 :   result = aScanner.Peek(ch, 1);
      86                 : 
      87              11 :   if (NS_SUCCEEDED(result)) {
      88              11 :     PRUnichar amp = 0;
      89              11 :     PRInt32 theNCRValue = 0;
      90              22 :     nsAutoString entity;
      91                 : 
      92              11 :     if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      93              11 :       result = CEntityToken::ConsumeEntity(ch, entity, aScanner);
      94              11 :       if (NS_SUCCEEDED(result)) {
      95              11 :         theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
      96              11 :         PRUnichar theTermChar = entity.Last();
      97                 :         // If an entity value is greater than 255 then:
      98                 :         // Nav 4.x does not treat it as an entity,
      99                 :         // IE treats it as an entity if terminated with a semicolon.
     100                 :         // Resembling IE!!
     101                 : 
     102              11 :         nsSubstring &writable = aString.writable();
     103              11 :         if (theNCRValue < 0 ||
     104                 :             (aIECompatible && theNCRValue > 255 && theTermChar != ';')) {
     105                 :           // Looks like we're not dealing with an entity
     106               0 :           writable.Append(kAmpersand);
     107               0 :           writable.Append(entity);
     108                 :         } else {
     109                 :           // A valid entity so reduce it.
     110              11 :           writable.Append(PRUnichar(theNCRValue));
     111                 :         }
     112                 :       }
     113               0 :     } else if (ch == kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
     114               0 :       result = CEntityToken::ConsumeEntity(ch, entity, aScanner);
     115               0 :       if (NS_SUCCEEDED(result)) {
     116               0 :         nsSubstring &writable = aString.writable();
     117               0 :         if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
     118                 :           // Looked like an entity but it's not
     119               0 :           aScanner.GetChar(amp);
     120               0 :           writable.Append(amp);
     121               0 :           result = NS_OK;
     122                 :         } else {
     123                 :           PRInt32 err;
     124               0 :           theNCRValue = entity.ToInteger(&err, kAutoDetect);
     125               0 :           AppendNCR(writable, theNCRValue);
     126                 :         }
     127               0 :       }
     128                 :     } else {
     129                 :       // What we thought as entity is not really an entity...
     130               0 :       aScanner.GetChar(amp);
     131               0 :       aString.writable().Append(amp);
     132                 :     }
     133                 :   }
     134                 : 
     135              11 :   return result;
     136                 : }
     137                 : 
     138                 : /*
     139                 :  *  This general purpose method is used when you want to
     140                 :  *  consume attributed text value.
     141                 :  *  Note: It also reduces entities.
     142                 :  *
     143                 :  *  @param   aNewlineCount -- the newline count to increment when hitting newlines
     144                 :  *  @param   aScanner -- controller of underlying input source
     145                 :  *  @param   aTerminalChars -- characters that stop consuming attribute.
     146                 :  *  @param   aAllowNewlines -- whether to allow newlines in the value.
     147                 :  *                             XXX it would be nice to roll this info into
     148                 :  *                             aTerminalChars somehow....
     149                 :  *  @param   aIECompatEntities IE treats entities with values > 255 as
     150                 :  *                             entities only if they're terminated with a
     151                 :  *                             semicolon. This is true to follow that behavior
     152                 :  *                             and false to treat all values as entities.
     153                 :  *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
     154                 :  *  @return  error result
     155                 :  */
     156                 : static nsresult
     157             757 : ConsumeUntil(nsScannerSharedSubstring& aString,
     158                 :              PRInt32& aNewlineCount,
     159                 :              nsScanner& aScanner,
     160                 :              const nsReadEndCondition& aEndCondition,
     161                 :              bool aAllowNewlines,
     162                 :              bool aIECompatEntities,
     163                 :              PRInt32 aFlag)
     164                 : {
     165             757 :   nsresult result = NS_OK;
     166             757 :   bool     done = false;
     167                 : 
     168            1536 :   do {
     169             768 :     result = aScanner.ReadUntil(aString, aEndCondition, false);
     170             768 :     if (NS_SUCCEEDED(result)) {
     171                 :       PRUnichar ch;
     172             768 :       aScanner.Peek(ch);
     173             768 :       if (ch == kAmpersand) {
     174              11 :         result = ConsumeEntity(aString, aScanner, aIECompatEntities, aFlag);
     175             757 :       } else if (ch == kCR && aAllowNewlines) {
     176               0 :         aScanner.GetChar(ch);
     177               0 :         result = aScanner.Peek(ch);
     178               0 :         if (NS_SUCCEEDED(result)) {
     179               0 :           nsSubstring &writable = aString.writable();
     180               0 :           if (ch == kNewLine) {
     181               0 :             writable.AppendLiteral("\r\n");
     182               0 :             aScanner.GetChar(ch);
     183                 :           } else {
     184               0 :             writable.Append(PRUnichar('\r'));
     185                 :           }
     186               0 :           ++aNewlineCount;
     187               0 :         }
     188             757 :       } else if (ch == kNewLine && aAllowNewlines) {
     189               0 :         aScanner.GetChar(ch);
     190               0 :         aString.writable().Append(PRUnichar('\n'));
     191               0 :         ++aNewlineCount;
     192                 :       } else {
     193             757 :         done = true;
     194                 :       }
     195                 :     }
     196            1536 :   } while (NS_SUCCEEDED(result) && !done);
     197                 : 
     198             757 :   return result;
     199                 : }
     200                 : 
     201                 : /**************************************************************
     202                 :   And now for the token classes...
     203                 :  **************************************************************/
     204                 : 
     205                 : /**
     206                 :  * Constructor from tag id
     207                 :  */
     208            3093 : CHTMLToken::CHTMLToken(eHTMLTags aTag)
     209            3093 :   : CToken(aTag)
     210                 : {
     211            3093 : }
     212                 : 
     213                 : 
     214            3093 : CHTMLToken::~CHTMLToken()
     215                 : {
     216            3093 : }
     217                 : 
     218                 : /*
     219                 :  * Constructor from tag id
     220                 :  */
     221             797 : CStartToken::CStartToken(eHTMLTags aTag)
     222             797 :   : CHTMLToken(aTag)
     223                 : {
     224             797 :   mEmpty = false;
     225             797 :   mContainerInfo = eFormUnknown;
     226                 : #ifdef DEBUG
     227             797 :   mAttributed = false;
     228                 : #endif
     229             797 : }
     230                 : 
     231               0 : CStartToken::CStartToken(const nsAString& aName)
     232               0 :   : CHTMLToken(eHTMLTag_unknown)
     233                 : {
     234               0 :   mEmpty = false;
     235               0 :   mContainerInfo = eFormUnknown;
     236               0 :   mTextValue.Assign(aName);
     237                 : #ifdef DEBUG
     238               0 :   mAttributed = false;
     239                 : #endif
     240               0 : }
     241                 : 
     242              53 : CStartToken::CStartToken(const nsAString& aName, eHTMLTags aTag)
     243              53 :   : CHTMLToken(aTag)
     244                 : {
     245              53 :   mEmpty = false;
     246              53 :   mContainerInfo = eFormUnknown;
     247              53 :   mTextValue.Assign(aName);
     248                 : #ifdef DEBUG
     249              53 :   mAttributed = false;
     250                 : #endif
     251              53 : }
     252                 : 
     253                 : /*
     254                 :  * This method returns the typeid (the tag type) for this token.
     255                 :  */
     256                 : PRInt32
     257           10605 : CStartToken::GetTypeID()
     258                 : {
     259           10605 :   if (eHTMLTag_unknown == mTypeID) {
     260               0 :     mTypeID = nsHTMLTags::LookupTag(mTextValue);
     261                 :   }
     262           10605 :   return mTypeID;
     263                 : }
     264                 : 
     265                 : PRInt32
     266            2519 : CStartToken::GetTokenType()
     267                 : {
     268            2519 :   return eToken_start;
     269                 : }
     270                 : 
     271                 : void
     272               0 : CStartToken::SetEmpty(bool aValue)
     273                 : {
     274               0 :   mEmpty = aValue;
     275               0 : }
     276                 : 
     277                 : bool
     278               0 : CStartToken::IsEmpty()
     279                 : {
     280               0 :   return mEmpty;
     281                 : }
     282                 : 
     283                 : /*
     284                 :  * Consume the identifier portion of the start tag
     285                 :  */
     286                 : nsresult
     287             794 : CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
     288                 : {
     289                 :   // If you're here, we've already Consumed the < char, and are
     290                 :   // ready to Consume the rest of the open tag identifier.
     291                 :   // Stop consuming as soon as you see a space or a '>'.
     292                 :   // NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
     293                 : 
     294             794 :   nsresult result = NS_OK;
     295            1588 :   nsScannerSharedSubstring tagIdent;
     296                 : 
     297             794 :   if (aFlag & NS_IPARSER_FLAG_HTML) {
     298             794 :     result = aScanner.ReadTagIdentifier(tagIdent);
     299             794 :     mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
     300                 :     // Save the original tag string if this is user-defined or if we
     301                 :     // are viewing source
     302             794 :     if (eHTMLTag_userdefined == mTypeID ||
     303                 :         (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
     304               0 :       mTextValue = tagIdent.str();
     305                 :     }
     306                 :   } else {
     307               0 :     result = aScanner.ReadTagIdentifier(tagIdent);
     308               0 :     mTextValue = tagIdent.str();
     309               0 :     mTypeID = nsHTMLTags::LookupTag(mTextValue);
     310                 :   }
     311                 : 
     312             794 :   if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
     313             794 :     result = aScanner.SkipWhitespace(mNewlineCount);
     314                 :   }
     315                 : 
     316             794 :   if (kEOF == result && !aScanner.IsIncremental()) {
     317                 :     // Take what we can get.
     318               0 :     result = NS_OK;
     319                 :   }
     320                 : 
     321             794 :   return result;
     322                 : }
     323                 : 
     324                 : const nsSubstring&
     325               0 : CStartToken::GetStringValue()
     326                 : {
     327               0 :   if (eHTMLTag_unknown < mTypeID && mTypeID < eHTMLTag_text) {
     328               0 :     if (!mTextValue.Length()) {
     329               0 :       mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
     330                 :     }
     331                 :   }
     332               0 :   return mTextValue;
     333                 : }
     334                 : 
     335                 : void
     336               0 : CStartToken::GetSource(nsString& anOutputString)
     337                 : {
     338               0 :   anOutputString.Truncate();
     339               0 :   AppendSourceTo(anOutputString);
     340               0 : }
     341                 : 
     342                 : void
     343               0 : CStartToken::AppendSourceTo(nsAString& anOutputString)
     344                 : {
     345               0 :   anOutputString.Append(PRUnichar('<'));
     346                 :   /*
     347                 :    * Watch out for Bug 15204
     348                 :    */
     349               0 :   if (!mTextValue.IsEmpty()) {
     350               0 :     anOutputString.Append(mTextValue);
     351                 :   } else {
     352               0 :     anOutputString.Append(GetTagName(mTypeID));
     353                 :   }
     354                 : 
     355               0 :   anOutputString.Append(PRUnichar('>'));
     356               0 : }
     357                 : 
     358             344 : CEndToken::CEndToken(eHTMLTags aTag)
     359             344 :   : CHTMLToken(aTag)
     360                 : {
     361             344 : }
     362                 : 
     363               0 : CEndToken::CEndToken(const nsAString& aName)
     364               0 :   : CHTMLToken(eHTMLTag_unknown)
     365                 : {
     366               0 :   mTextValue.Assign(aName);
     367               0 : }
     368                 : 
     369               0 : CEndToken::CEndToken(const nsAString& aName, eHTMLTags aTag)
     370               0 :   : CHTMLToken(aTag)
     371                 : {
     372               0 :   mTextValue.Assign(aName);
     373               0 : }
     374                 : 
     375                 : nsresult
     376             344 : CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
     377                 : {
     378             344 :   nsresult result = NS_OK;
     379             688 :   nsScannerSharedSubstring tagIdent;
     380                 : 
     381             344 :   if (aFlag & NS_IPARSER_FLAG_HTML) {
     382             344 :     result = aScanner.ReadTagIdentifier(tagIdent);
     383                 : 
     384             344 :     mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
     385                 :     // Save the original tag string if this is user-defined or if we
     386                 :     // are viewing source
     387             344 :     if (eHTMLTag_userdefined == mTypeID ||
     388                 :         (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
     389               0 :       mTextValue = tagIdent.str();
     390                 :     }
     391                 :   } else {
     392               0 :     result = aScanner.ReadTagIdentifier(tagIdent);
     393               0 :     mTextValue = tagIdent.str();
     394               0 :     mTypeID = nsHTMLTags::LookupTag(mTextValue);
     395                 :   }
     396                 : 
     397             344 :   if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
     398             344 :     result = aScanner.SkipWhitespace(mNewlineCount);
     399                 :   }
     400                 : 
     401             344 :   if (kEOF == result && !aScanner.IsIncremental()) {
     402                 :     // Take what we can get.
     403               0 :     result = NS_OK;
     404                 :   }
     405                 : 
     406             344 :   return result;
     407                 : }
     408                 : 
     409                 : 
     410                 : /*
     411                 :  *  Asks the token to determine the <i>HTMLTag type</i> of
     412                 :  *  the token. This turns around and looks up the tag name
     413                 :  *  in the tag dictionary.
     414                 :  */
     415                 : PRInt32
     416            1032 : CEndToken::GetTypeID()
     417                 : {
     418            1032 :   if (eHTMLTag_unknown == mTypeID) {
     419               0 :     mTypeID = nsHTMLTags::LookupTag(mTextValue);
     420               0 :     switch (mTypeID) {
     421                 :       case eHTMLTag_dir:
     422                 :       case eHTMLTag_menu:
     423               0 :         mTypeID = eHTMLTag_ul;
     424               0 :         break;
     425                 : 
     426                 :       default:
     427               0 :         break;
     428                 :     }
     429                 :   }
     430                 : 
     431            1032 :   return mTypeID;
     432                 : }
     433                 : 
     434                 : PRInt32
     435             688 : CEndToken::GetTokenType()
     436                 : {
     437             688 :   return eToken_end;
     438                 : }
     439                 : 
     440                 : const nsSubstring&
     441               0 : CEndToken::GetStringValue()
     442                 : {
     443               0 :   if (eHTMLTag_unknown < mTypeID && mTypeID < eHTMLTag_text) {
     444               0 :     if (!mTextValue.Length()) {
     445               0 :       mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
     446                 :     }
     447                 :   }
     448               0 :   return mTextValue;
     449                 : }
     450                 : 
     451                 : void
     452               0 : CEndToken::GetSource(nsString& anOutputString)
     453                 : {
     454               0 :   anOutputString.Truncate();
     455               0 :   AppendSourceTo(anOutputString);
     456               0 : }
     457                 : 
     458                 : void
     459               0 : CEndToken::AppendSourceTo(nsAString& anOutputString)
     460                 : {
     461               0 :   anOutputString.AppendLiteral("</");
     462               0 :   if (!mTextValue.IsEmpty()) {
     463               0 :     anOutputString.Append(mTextValue);
     464                 :   } else {
     465               0 :     anOutputString.Append(GetTagName(mTypeID));
     466                 :   }
     467                 : 
     468               0 :   anOutputString.Append(PRUnichar('>'));
     469               0 : }
     470                 : 
     471             306 : CTextToken::CTextToken()
     472             306 :   : CHTMLToken(eHTMLTag_text)
     473                 : {
     474             306 : }
     475                 : 
     476               0 : CTextToken::CTextToken(const nsAString& aName)
     477               0 :   : CHTMLToken(eHTMLTag_text)
     478                 : {
     479               0 :   mTextValue.Rebind(aName);
     480               0 : }
     481                 : 
     482                 : PRInt32
     483             943 : CTextToken::GetTokenType()
     484                 : {
     485             943 :   return eToken_text;
     486                 : }
     487                 : 
     488                 : PRInt32
     489               0 : CTextToken::GetTextLength()
     490                 : {
     491               0 :   return mTextValue.Length();
     492                 : }
     493                 : 
     494                 : nsresult
     495             281 : CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
     496                 : {
     497                 :   static const PRUnichar theTerminalsChars[] =
     498                 :     { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('&'), PRUnichar('<'),
     499                 :       PRUnichar(0) };
     500             281 :   static const nsReadEndCondition theEndCondition(theTerminalsChars);
     501             281 :   nsresult  result = NS_OK;
     502             281 :   bool      done = false;
     503             281 :   nsScannerIterator origin, start, end;
     504                 : 
     505                 :   // Start scanning after the first character, because we know it to
     506                 :   // be part of this text token (we wouldn't have come here if it weren't)
     507             281 :   aScanner.CurrentPosition(origin);
     508             281 :   start = origin;
     509             281 :   aScanner.EndReading(end);
     510                 : 
     511             281 :   NS_ASSERTION(start != end, "Calling CTextToken::Consume when already at the "
     512                 :                              "end of a document is a bad idea.");
     513                 : 
     514             281 :   aScanner.SetPosition(++start);
     515                 : 
     516             892 :   while (NS_OK == result && !done) {
     517             330 :     result = aScanner.ReadUntil(start, end, theEndCondition, false);
     518             330 :     if (NS_OK == result) {
     519             330 :       result = aScanner.Peek(aChar);
     520                 : 
     521             330 :       if (NS_OK == result && (kCR == aChar || kNewLine == aChar)) {
     522              49 :         switch (aChar) {
     523                 :           case kCR:
     524                 :           {
     525                 :             // It's a carriage return. See if this is part of a CR-LF pair (in
     526                 :             // which case we need to treat it as one newline). If we're at the
     527                 :             // edge of a packet, then leave the CR on the scanner, since it
     528                 :             // could still be part of a CR-LF pair. Otherwise, it isn't.
     529                 :             PRUnichar theNextChar;
     530               0 :             result = aScanner.Peek(theNextChar, 1);
     531                 : 
     532               0 :             if (result == kEOF && aScanner.IsIncremental()) {
     533               0 :               break;
     534                 :             }
     535                 : 
     536               0 :             if (NS_SUCCEEDED(result)) {
     537                 :               // Actually get the carriage return.
     538               0 :               aScanner.GetChar(aChar);
     539                 :             }
     540                 : 
     541               0 :             if (kLF == theNextChar) {
     542                 :               // If the "\r" is followed by a "\n", don't replace it and let
     543                 :               // it be ignored by the layout system.
     544               0 :               end.advance(2);
     545               0 :               aScanner.GetChar(theNextChar);
     546                 :             } else {
     547                 :               // If it is standalone, replace the "\r" with a "\n" so that it
     548                 :               // will be considered by the layout system.
     549               0 :               aScanner.ReplaceCharacter(end, kLF);
     550               0 :               ++end;
     551                 :             }
     552               0 :             ++mNewlineCount;
     553               0 :             break;
     554                 :           }
     555                 :           case kLF:
     556              49 :             aScanner.GetChar(aChar);
     557              49 :             ++end;
     558              49 :             ++mNewlineCount;
     559              49 :             break;
     560              49 :         }
     561                 :       } else {
     562             281 :         done = true;
     563                 :       }
     564                 :     }
     565                 :   }
     566                 : 
     567                 :   // Note: This function is only called from nsHTMLTokenizer::ConsumeText. If
     568                 :   // we return an error result from the final buffer, then it is responsible
     569                 :   // for turning it into an NS_OK result.
     570             281 :   aScanner.BindSubstring(mTextValue, origin, end);
     571                 : 
     572             281 :   return result;
     573                 : }
     574                 : 
     575                 : /*
     576                 :  *  Consume as much clear text from scanner as possible.
     577                 :  *  The scanner is left on the < of the perceived end tag.
     578                 :  *
     579                 :  *  @param   aChar -- last char consumed from stream
     580                 :  *  @param   aConservativeConsume -- controls our handling of content with no
     581                 :  *                                   terminating string.
     582                 :  *  @param   aIgnoreComments -- whether or not we should take comments into
     583                 :  *                              account in looking for the end tag.
     584                 :  *  @param   aScanner -- controller of underlying input source
     585                 :  *  @param   aEndTagname -- the terminal tag name.
     586                 :  *  @param   aFlag -- dtd modes and such.
     587                 :  *  @param   aFlushTokens -- true if we found the terminal tag.
     588                 :  *  @return  error result
     589                 :  */
     590                 : nsresult
     591               0 : CTextToken::ConsumeCharacterData(bool aIgnoreComments,
     592                 :                                  nsScanner& aScanner,
     593                 :                                  const nsAString& aEndTagName,
     594                 :                                  PRInt32 aFlag,
     595                 :                                  bool& aFlushTokens)
     596                 : {
     597               0 :   nsresult result = NS_OK;
     598               0 :   nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos,
     599               0 :                     theStartCommentPos, theAltTermStrPos, endPos;
     600               0 :   bool          done = false;
     601               0 :   bool          theLastIteration = false;
     602                 : 
     603               0 :   aScanner.CurrentPosition(theStartOffset);
     604               0 :   theCurrOffset = theStartOffset;
     605               0 :   aScanner.EndReading(endPos);
     606               0 :   theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos;
     607                 : 
     608                 :   // ALGORITHM: *** The performance is based on correctness of the document ***
     609                 :   // 1. Look for a '<' character.  This could be
     610                 :   //      a) Start of a comment (<!--),
     611                 :   //      b) Start of the terminal string, or
     612                 :   //      c) a start of a tag.
     613                 :   //    We are interested in a) and b). c) is ignored because in CDATA we
     614                 :   //    don't care for tags.
     615                 :   //    NOTE: Technically speaking in CDATA we should ignore the comments too!
     616                 :   //    But for compatibility we don't.
     617                 :   // 2. Having the offset, for '<', search for the terminal string from there
     618                 :   //    on and record its offset.
     619                 :   // 3. From the same '<' offset also search for start of a comment '<!--'.
     620                 :   //    If found search for end comment '-->' between the terminal string and
     621                 :   //    '<!--'.  If you did not find the end comment, then we have a malformed
     622                 :   //    document, i.e., this section has a prematured terminal string Ex.
     623                 :   //    <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But record
     624                 :   //    terminal string's offset if this is the first premature terminal
     625                 :   //    string, and update the current offset to the terminal string
     626                 :   //    (prematured) offset and goto step 1.
     627                 :   // 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
     628                 :   // 5. If the end of the document is reached and if we still don't have the
     629                 :   //    condition in step 4. then assume that the prematured terminal string
     630                 :   //    is the actual terminal string and goto step 1. This will be our last
     631                 :   //    iteration. If there is no premature terminal string and we're being
     632                 :   //    conservative in our consumption (aConservativeConsume), then don't
     633                 :   //    consume anything from the scanner. Otherwise, we consume all the way
     634                 :   //    until the end.
     635                 : 
     636               0 :   NS_NAMED_LITERAL_STRING(ltslash, "</");
     637               0 :   const nsString theTerminalString = ltslash + aEndTagName;
     638                 : 
     639               0 :   PRUint32 termStrLen = theTerminalString.Length();
     640               0 :   while (result == NS_OK && !done) {
     641               0 :     bool found = false;
     642               0 :     nsScannerIterator gtOffset, ltOffset = theCurrOffset;
     643               0 :     while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
     644               0 :            ((PRUint32)ltOffset.size_forward() >= termStrLen ||
     645               0 :             Distance(ltOffset, endPos) >= termStrLen)) {
     646                 :       // Make a copy of the (presumed) end tag and
     647                 :       // do a case-insensitive comparison
     648                 : 
     649               0 :       nsScannerIterator start(ltOffset), end(ltOffset);
     650               0 :       end.advance(termStrLen);
     651                 : 
     652               0 :       if (CaseInsensitiveFindInReadable(theTerminalString, start, end) &&
     653               0 :           (end == endPos || (*end == '>'  || *end == ' '  ||
     654               0 :                              *end == '\t' || *end == '\n' ||
     655               0 :                              *end == '\r'))) {
     656               0 :         gtOffset = end;
     657                 :         // Note that aIgnoreComments is only not set for <script>. We don't
     658                 :         // want to execute scripts that aren't in the form of: <script\s.*>
     659               0 :         if ((end == endPos && aIgnoreComments) ||
     660               0 :             FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
     661               0 :           found = true;
     662               0 :           theTermStrPos = start;
     663                 :         }
     664               0 :         break;
     665                 :       }
     666               0 :       ltOffset.advance(1);
     667                 :     }
     668                 : 
     669               0 :     if (found && theTermStrPos != endPos) {
     670               0 :       if (!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
     671               0 :           !theLastIteration && !aIgnoreComments) {
     672               0 :         nsScannerIterator endComment(ltOffset);
     673               0 :         endComment.advance(5);
     674                 : 
     675               0 :         if ((theStartCommentPos == endPos) &&
     676               0 :             FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset,
     677               0 :                            endComment)) {
     678               0 :           theStartCommentPos = theCurrOffset;
     679                 :         }
     680                 : 
     681               0 :         if (theStartCommentPos != endPos) {
     682                 :           // Search for --> between <!-- and </TERMINALSTRING>.
     683               0 :           theCurrOffset = theStartCommentPos;
     684               0 :           nsScannerIterator terminal(theTermStrPos);
     685               0 :           if (!RFindInReadable(NS_LITERAL_STRING("-->"),
     686               0 :                                theCurrOffset, terminal)) {
     687                 :             // If you're here it means that we have a bogus terminal string.
     688                 :             // Even though it is bogus, the position of the terminal string
     689                 :             // could be helpful in case we hit the rock bottom.
     690               0 :             if (theAltTermStrPos == endPos) {
     691                 :               // But we only want to remember the first bogus terminal string.
     692               0 :               theAltTermStrPos = theTermStrPos;
     693                 :             }
     694                 : 
     695                 :             // We did not find '-->' so keep searching for terminal string.
     696               0 :             theCurrOffset = theTermStrPos;
     697               0 :             theCurrOffset.advance(termStrLen);
     698               0 :             continue;
     699                 :           }
     700                 :         }
     701                 :       }
     702                 : 
     703               0 :       aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
     704               0 :       aScanner.SetPosition(ltOffset);
     705                 : 
     706                 :       // We found </SCRIPT> or </STYLE>...permit flushing -> Ref: Bug 22485
     707               0 :       aFlushTokens = true;
     708               0 :       done = true;
     709                 :     } else {
     710                 :       // We end up here if:
     711                 :       // a) when the buffer runs out ot data.
     712                 :       // b) when the terminal string is not found.
     713               0 :       if (!aScanner.IsIncremental()) {
     714               0 :         if (theAltTermStrPos != endPos) {
     715                 :           // If you're here it means that we hit the rock bottom and therefore
     716                 :           // switch to plan B, since we have an alternative terminating string.
     717               0 :           theCurrOffset = theAltTermStrPos;
     718               0 :           theLastIteration = true;
     719                 :         } else {
     720                 :           // Oops, We fell all the way down to the end of the document.
     721               0 :           done = true; // Do this to fix Bug. 35456
     722               0 :           result = kFakeEndTag;
     723               0 :           aScanner.BindSubstring(mTextValue, theStartOffset, endPos);
     724               0 :           aScanner.SetPosition(endPos);
     725                 :         }
     726                 :       } else {
     727               0 :         result = kEOF;
     728                 :       }
     729                 :     }
     730                 :   }
     731                 : 
     732               0 :   if (result == NS_OK) {
     733               0 :     mNewlineCount = mTextValue.CountChar(kNewLine);
     734                 :   }
     735                 : 
     736               0 :   return result;
     737                 : }
     738                 : 
     739                 : /*
     740                 :  *  Consume as much clear text from scanner as possible. Reducing entities.
     741                 :  *  The scanner is left on the < of the perceived end tag.
     742                 :  *
     743                 :  *  @param   aChar -- last char consumed from stream
     744                 :  *  @param   aConservativeConsume -- controls our handling of content with no
     745                 :  *                                   terminating string.
     746                 :  *  @param   aScanner -- controller of underlying input source
     747                 :  *  @param   aEndTagname -- the terminal tag name.
     748                 :  *  @param   aFlag -- dtd modes and such.
     749                 :  *  @param   aFlushTokens -- true if we found the terminal tag.
     750                 :  *  @return  error result
     751                 :  */
     752                 : nsresult
     753              25 : CTextToken::ConsumeParsedCharacterData(bool aDiscardFirstNewline,
     754                 :                                        bool aConservativeConsume,
     755                 :                                        nsScanner& aScanner,
     756                 :                                        const nsAString& aEndTagName,
     757                 :                                        PRInt32 aFlag,
     758                 :                                        bool& aFound)
     759                 : {
     760                 :   // This function is fairly straightforward except if there is no terminating
     761                 :   // string. If there is, we simply loop through all of the entities, reducing
     762                 :   // them as necessary and skipping over non-terminal strings starting with <.
     763                 :   // If there is *no* terminal string, then we examine aConservativeConsume.
     764                 :   // If we want to be conservative, we backtrack to the first place in the
     765                 :   // document that looked like the end of PCDATA (i.e., the first tag). This
     766                 :   // is for compatibility and so we don't regress bug 42945. If we are not
     767                 :   // conservative, then we consume everything, all the way up to the end of
     768                 :   // the document.
     769                 : 
     770                 :   static const PRUnichar terminalChars[] = {
     771                 :     PRUnichar('\r'), PRUnichar('\n'), PRUnichar('&'), PRUnichar('<'),
     772                 :     PRUnichar(0)
     773                 :   };
     774              25 :   static const nsReadEndCondition theEndCondition(terminalChars);
     775                 : 
     776              25 :   nsScannerIterator currPos, endPos, altEndPos;
     777              25 :   PRUint32 truncPos = 0;
     778              25 :   PRInt32 truncNewlineCount = 0;
     779              25 :   aScanner.CurrentPosition(currPos);
     780              25 :   aScanner.EndReading(endPos);
     781                 : 
     782              25 :   altEndPos = endPos;
     783                 : 
     784              50 :   nsScannerSharedSubstring theContent;
     785              25 :   PRUnichar ch = 0;
     786                 : 
     787              50 :   NS_NAMED_LITERAL_STRING(commentStart, "<!--");
     788              50 :   NS_NAMED_LITERAL_STRING(ltslash, "</");
     789              50 :   const nsString theTerminalString = ltslash + aEndTagName;
     790              25 :   PRUint32 termStrLen = theTerminalString.Length();
     791              25 :   PRUint32 commentStartLen = commentStart.Length();
     792                 : 
     793              25 :   nsresult result = NS_OK;
     794                 : 
     795                 :   // Note that if we're already at the end of the document, the ConsumeUntil
     796                 :   // will fail, and we'll do the right thing.
     797               0 :   do {
     798                 :     result = ConsumeUntil(theContent, mNewlineCount, aScanner,
     799              25 :                           theEndCondition, true, false, aFlag);
     800                 : 
     801              25 :     if (aDiscardFirstNewline &&
     802               0 :         (NS_SUCCEEDED(result) || !aScanner.IsIncremental()) &&
     803               0 :         !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
     804                 :       // Check if the very first character is a newline, and if so discard it.
     805                 :       // Note that we don't want to discard it in view source!
     806                 :       // Also note that this has to happen here (as opposed to before the
     807                 :       // ConsumeUntil) because we have to expand any entities.
     808                 :       // XXX It would be nice to be able to do this without calling
     809                 :       // writable()!
     810               0 :       const nsSubstring &firstChunk = theContent.str();
     811               0 :       if (!firstChunk.IsEmpty()) {
     812               0 :         PRUint32 where = 0;
     813               0 :         PRUnichar newline = firstChunk.First();
     814                 : 
     815               0 :         if (newline == kCR || newline == kNewLine) {
     816               0 :           ++where;
     817                 : 
     818               0 :           if (firstChunk.Length() > 1) {
     819               0 :             if (newline == kCR && firstChunk.CharAt(1) == kNewLine) {
     820                 :               // Handle \r\n = 1 newline.
     821               0 :               ++where;
     822                 :             }
     823                 :             // Note: \n\r = 2 newlines.
     824                 :           }
     825                 :         }
     826                 : 
     827               0 :         if (where != 0) {
     828               0 :           theContent.writable() = Substring(firstChunk, where);
     829                 :         }
     830                 :       }
     831                 :     }
     832              25 :     aDiscardFirstNewline = false;
     833                 : 
     834              25 :     if (NS_FAILED(result)) {
     835               0 :       if (kEOF == result && !aScanner.IsIncremental()) {
     836               0 :         aFound = true; // this is as good as it gets.
     837               0 :         result = kFakeEndTag;
     838                 : 
     839               0 :         if (aConservativeConsume && altEndPos != endPos) {
     840                 :           // We ran out of room looking for a </title>. Go back to the first
     841                 :           // place that looked like a tag and use that as our stopping point.
     842               0 :           theContent.writable().Truncate(truncPos);
     843               0 :           mNewlineCount = truncNewlineCount;
     844               0 :           aScanner.SetPosition(altEndPos, false, true);
     845                 :         }
     846                 :         // else we take everything we consumed.
     847               0 :         mTextValue.Rebind(theContent.str());
     848                 :       } else {
     849               0 :         aFound = false;
     850                 :       }
     851                 : 
     852               0 :       return result;
     853                 :     }
     854                 : 
     855              25 :     aScanner.CurrentPosition(currPos);
     856              25 :     aScanner.GetChar(ch); // this character must be '&' or '<'
     857                 : 
     858              25 :     if (ch == kLessThan && altEndPos == endPos) {
     859                 :       // Keep this position in case we need it for later.
     860              25 :       altEndPos = currPos;
     861              25 :       truncPos = theContent.str().Length();
     862              25 :       truncNewlineCount = mNewlineCount;
     863                 :     }
     864                 : 
     865              25 :     if (Distance(currPos, endPos) >= termStrLen) {
     866              25 :       nsScannerIterator start(currPos), end(currPos);
     867              25 :       end.advance(termStrLen);
     868                 : 
     869              25 :       if (CaseInsensitiveFindInReadable(theTerminalString, start, end)) {
     870              25 :         if (end != endPos && (*end == '>'  || *end == ' '  ||
     871               0 :                               *end == '\t' || *end == '\n' ||
     872               0 :                               *end == '\r')) {
     873              25 :           aFound = true;
     874              25 :           mTextValue.Rebind(theContent.str());
     875                 : 
     876                 :           // Note: This SetPosition() is actually going backwards from the
     877                 :           // scanner's mCurrentPosition (so we pass aReverse == true). This
     878                 :           // is because we call GetChar() above after we get the current
     879                 :           // position.
     880              25 :           aScanner.SetPosition(currPos, false, true);
     881              25 :           break;
     882                 :         }
     883                 :       }
     884                 :     }
     885                 :     // IE only consumes <!-- --> as comments in PCDATA.
     886               0 :     if (Distance(currPos, endPos) >= commentStartLen) {
     887               0 :       nsScannerIterator start(currPos), end(currPos);
     888               0 :       end.advance(commentStartLen);
     889                 : 
     890               0 :       if (CaseInsensitiveFindInReadable(commentStart, start, end)) {
     891               0 :         CCommentToken consumer; // stack allocated.
     892                 : 
     893                 :         // CCommentToken expects us to be on the '-'
     894               0 :         aScanner.SetPosition(currPos.advance(2));
     895                 : 
     896                 :         // In quirks mode we consume too many things as comments, so pretend
     897                 :         // that we're not by modifying aFlag.
     898               0 :         result = consumer.Consume(*currPos, aScanner,
     899                 :                                   (aFlag & ~NS_IPARSER_FLAG_QUIRKS_MODE) |
     900               0 :                                    NS_IPARSER_FLAG_STRICT_MODE);
     901               0 :         if (kEOF == result) {
     902                 :           // This can only happen if we're really out of space.
     903               0 :           return kEOF;
     904               0 :         } else if (kNotAComment == result) {
     905                 :           // Fall through and consume this as text.
     906               0 :           aScanner.CurrentPosition(currPos);
     907               0 :           aScanner.SetPosition(currPos.advance(1));
     908                 :         } else {
     909               0 :           consumer.AppendSourceTo(theContent.writable());
     910               0 :           mNewlineCount += consumer.GetNewlineCount();
     911               0 :           continue;
     912                 :         }
     913                 :       }
     914                 :     }
     915                 : 
     916               0 :     result = kEOF;
     917                 :     // We did not find the terminal string yet so
     918                 :     // include the character that stopped consumption.
     919               0 :     theContent.writable().Append(ch);
     920                 :   } while (currPos != endPos);
     921                 : 
     922              25 :   return result;
     923                 : }
     924                 : 
     925                 : void
     926               0 : CTextToken::CopyTo(nsAString& aStr)
     927                 : {
     928               0 :   nsScannerIterator start, end;
     929               0 :   mTextValue.BeginReading(start);
     930               0 :   mTextValue.EndReading(end);
     931               0 :   CopyUnicodeTo(start, end, aStr);
     932               0 : }
     933                 : 
     934             306 : const nsSubstring& CTextToken::GetStringValue()
     935                 : {
     936             306 :   return mTextValue.AsString();
     937                 : }
     938                 : 
     939                 : void
     940               0 : CTextToken::Bind(nsScanner* aScanner, nsScannerIterator& aStart,
     941                 :                  nsScannerIterator& aEnd)
     942                 : {
     943               0 :   aScanner->BindSubstring(mTextValue, aStart, aEnd);
     944               0 : }
     945                 : 
     946                 : void
     947               0 : CTextToken::Bind(const nsAString& aStr)
     948                 : {
     949               0 :   mTextValue.Rebind(aStr);
     950               0 : }
     951                 : 
     952               0 : CCDATASectionToken::CCDATASectionToken(eHTMLTags aTag)
     953               0 :   : CHTMLToken(aTag)
     954                 : {
     955               0 : }
     956                 : 
     957               0 : CCDATASectionToken::CCDATASectionToken(const nsAString& aName)
     958               0 :   : CHTMLToken(eHTMLTag_unknown)
     959                 : {
     960               0 :   mTextValue.Assign(aName);
     961               0 : }
     962                 : 
     963                 : PRInt32
     964               0 : CCDATASectionToken::GetTokenType()
     965                 : {
     966               0 :   return eToken_cdatasection;
     967                 : }
     968                 : 
     969                 : /*
     970                 :  *  Consume as much marked test from scanner as possible.
     971                 :  *  Note: This has to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>"
     972                 :  *
     973                 :  *  @param   aChar -- last char consumed from stream
     974                 :  *  @param   aScanner -- controller of underlying input source
     975                 :  *  @return  error result
     976                 :  */
     977                 : nsresult
     978               0 : CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,
     979                 :                             PRInt32 aFlag)
     980                 : {
     981                 :   static const PRUnichar theTerminalsChars[] =
     982                 :   { PRUnichar('\r'), PRUnichar('\n'), PRUnichar(']'), PRUnichar(0) };
     983               0 :   static const nsReadEndCondition theEndCondition(theTerminalsChars);
     984               0 :   nsresult  result = NS_OK;
     985               0 :   bool      done = false;
     986                 : 
     987               0 :   while (NS_OK == result && !done) {
     988               0 :     result = aScanner.ReadUntil(mTextValue, theEndCondition, false);
     989               0 :     if (NS_OK == result) {
     990               0 :       result = aScanner.Peek(aChar);
     991               0 :       if (kCR == aChar && NS_OK == result) {
     992               0 :         result = aScanner.GetChar(aChar); // Strip off the \r
     993               0 :         result = aScanner.Peek(aChar);    // Then see what's next.
     994               0 :         if (NS_OK == result) {
     995               0 :           switch(aChar) {
     996                 :             case kCR:
     997               0 :               result = aScanner.GetChar(aChar); // Strip off the \r
     998               0 :               mTextValue.AppendLiteral("\n\n");
     999               0 :               mNewlineCount += 2;
    1000               0 :               break;
    1001                 : 
    1002                 :             case kNewLine:
    1003                 :               // Which means we saw \r\n, which becomes \n
    1004               0 :               result = aScanner.GetChar(aChar); // Strip off the \n
    1005                 : 
    1006                 :               // Fall through...
    1007                 :             default:
    1008               0 :               mTextValue.AppendLiteral("\n");
    1009               0 :               mNewlineCount++;
    1010               0 :               break;
    1011                 :           }
    1012                 :         }
    1013               0 :       } else if (kNewLine == aChar) {
    1014               0 :         result = aScanner.GetChar(aChar);
    1015               0 :         mTextValue.Append(aChar);
    1016               0 :         ++mNewlineCount;
    1017               0 :       } else if (kRightSquareBracket == aChar) {
    1018               0 :         bool canClose = false;
    1019               0 :         result = aScanner.GetChar(aChar); // Strip off the ]
    1020               0 :         mTextValue.Append(aChar);
    1021               0 :         result = aScanner.Peek(aChar);    // Then see what's next.
    1022               0 :         if (NS_OK == result && kRightSquareBracket == aChar) {
    1023               0 :           result = aScanner.GetChar(aChar); // Strip off the second ]
    1024               0 :           mTextValue.Append(aChar);
    1025               0 :           canClose = true;
    1026                 :         }
    1027                 : 
    1028                 :         // The goal here is to not lose data from the page when encountering
    1029                 :         // markup like: <![endif]-->.  This means that in normal parsing, we
    1030                 :         // allow ']' to end the marked section and just drop everything between
    1031                 :         // it an the '>'.  In view-source mode, we cannot drop things on the
    1032                 :         // floor like that.  In fact, to make view-source of XML with script in
    1033                 :         // CDATA sections at all bearable, we need to somewhat enforce the ']]>'
    1034                 :         // terminator for marked sections.  So make the tokenization somewhat
    1035                 :         // different when in view-source _and_ dealing with a CDATA section.
    1036                 :         // XXX We should remember this StringBeginsWith test.
    1037                 :         bool inCDATA = (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) &&
    1038               0 :           StringBeginsWith(mTextValue, NS_LITERAL_STRING("[CDATA["));
    1039               0 :         if (inCDATA) {
    1040                 :           // Consume all right square brackets to catch cases such as:
    1041                 :           // <![CDATA[foo]]]>
    1042               0 :           while (true) {
    1043               0 :             result = aScanner.Peek(aChar);
    1044               0 :             if (result != NS_OK || aChar != kRightSquareBracket) {
    1045                 :               break;
    1046                 :             }
    1047                 : 
    1048               0 :             mTextValue.Append(aChar);
    1049               0 :             aScanner.GetChar(aChar);
    1050                 :           }
    1051                 :         } else {
    1052               0 :           nsAutoString dummy; // Skip any bad data
    1053               0 :           result = aScanner.ReadUntil(dummy, kGreaterThan, false);
    1054                 :         }
    1055               0 :         if (NS_OK == result &&
    1056               0 :             (!inCDATA || (canClose && kGreaterThan == aChar))) {
    1057               0 :           result = aScanner.GetChar(aChar); // Strip off the >
    1058               0 :           done = true;
    1059                 :         }
    1060                 :       } else {
    1061               0 :         done = true;
    1062                 :       }
    1063                 :     }
    1064                 :   }
    1065                 : 
    1066               0 :   if (kEOF == result && !aScanner.IsIncremental()) {
    1067                 :     // We ran out of space looking for the end of this CDATA section.
    1068                 :     // In order to not completely lose the entire section, treat everything
    1069                 :     // until the end of the document as part of the CDATA section and let
    1070                 :     // the DTD handle it.
    1071               0 :     mInError = true;
    1072               0 :     result = NS_OK;
    1073                 :   }
    1074                 : 
    1075               0 :   return result;
    1076                 : }
    1077                 : 
    1078                 : const nsSubstring&
    1079               0 : CCDATASectionToken::GetStringValue()
    1080                 : {
    1081               0 :   return mTextValue;
    1082                 : }
    1083                 : 
    1084                 : 
    1085               0 : CMarkupDeclToken::CMarkupDeclToken()
    1086               0 :   : CHTMLToken(eHTMLTag_markupDecl)
    1087                 : {
    1088               0 : }
    1089                 : 
    1090               0 : CMarkupDeclToken::CMarkupDeclToken(const nsAString& aName)
    1091               0 :   : CHTMLToken(eHTMLTag_markupDecl)
    1092                 : {
    1093               0 :   mTextValue.Rebind(aName);
    1094               0 : }
    1095                 : 
    1096                 : PRInt32
    1097               0 : CMarkupDeclToken::GetTokenType()
    1098                 : {
    1099               0 :   return eToken_markupDecl;
    1100                 : }
    1101                 : 
    1102                 : /*
    1103                 :  *  Consume as much declaration from scanner as possible.
    1104                 :  *  Declaration is a markup declaration of ELEMENT, ATTLIST, ENTITY or
    1105                 :  *  NOTATION, which can span multiple lines and ends in >.
    1106                 :  *
    1107                 :  *  @param   aChar -- last char consumed from stream
    1108                 :  *  @param   aScanner -- controller of underlying input source
    1109                 :  *  @return  error result
    1110                 :  */
    1111                 : nsresult
    1112               0 : CMarkupDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,
    1113                 :                           PRInt32 aFlag)
    1114                 : {
    1115                 :   static const PRUnichar theTerminalsChars[] =
    1116                 :     { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('\''), PRUnichar('"'),
    1117                 :       PRUnichar('>'),
    1118                 :       PRUnichar(0) };
    1119               0 :   static const nsReadEndCondition theEndCondition(theTerminalsChars);
    1120               0 :   nsresult  result = NS_OK;
    1121               0 :   bool      done = false;
    1122               0 :   PRUnichar quote = 0;
    1123                 : 
    1124               0 :   nsScannerIterator origin, start, end;
    1125               0 :   aScanner.CurrentPosition(origin);
    1126               0 :   start = origin;
    1127                 : 
    1128               0 :   while (NS_OK == result && !done) {
    1129               0 :     aScanner.SetPosition(start);
    1130               0 :     result = aScanner.ReadUntil(start, end, theEndCondition, false);
    1131               0 :     if (NS_OK == result) {
    1132               0 :       result = aScanner.Peek(aChar);
    1133                 : 
    1134               0 :       if (NS_OK == result) {
    1135               0 :         PRUnichar theNextChar = 0;
    1136               0 :         if (kCR == aChar || kNewLine == aChar) {
    1137               0 :           result = aScanner.GetChar(aChar); // Strip off the char
    1138               0 :           result = aScanner.Peek(theNextChar); // Then see what's next.
    1139                 :         }
    1140               0 :         switch(aChar) {
    1141                 :           case kCR:
    1142                 :             // result = aScanner.GetChar(aChar);
    1143               0 :             if (kLF == theNextChar) {
    1144                 :               // If the "\r" is followed by a "\n", don't replace it and
    1145                 :               // let it be ignored by the layout system
    1146               0 :               end.advance(2);
    1147               0 :               result = aScanner.GetChar(theNextChar);
    1148                 :             } else {
    1149                 :               // If it standalone, replace the "\r" with a "\n" so that
    1150                 :               // it will be considered by the layout system
    1151               0 :               aScanner.ReplaceCharacter(end, kLF);
    1152               0 :               ++end;
    1153                 :             }
    1154               0 :             ++mNewlineCount;
    1155               0 :             break;
    1156                 :           case kLF:
    1157               0 :             ++end;
    1158               0 :             ++mNewlineCount;
    1159               0 :             break;
    1160                 :           case '\'':
    1161                 :           case '"':
    1162               0 :             ++end;
    1163               0 :             if (quote) {
    1164               0 :               if (quote == aChar) {
    1165               0 :                 quote = 0;
    1166                 :               }
    1167                 :             } else {
    1168               0 :               quote = aChar;
    1169                 :             }
    1170               0 :             break;
    1171                 :           case kGreaterThan:
    1172               0 :             if (quote) {
    1173               0 :               ++end;
    1174                 :             } else {
    1175               0 :               start = end;
    1176                 :               // Note that start is wrong after this, we just avoid temp var
    1177               0 :               ++start;
    1178               0 :               aScanner.SetPosition(start); // Skip the >
    1179               0 :               done = true;
    1180                 :             }
    1181               0 :             break;
    1182                 :           default:
    1183               0 :             NS_ABORT_IF_FALSE(0, "should not happen, switch is missing cases?");
    1184               0 :             break;
    1185                 :         }
    1186               0 :         start = end;
    1187                 :       } else {
    1188               0 :         done = true;
    1189                 :       }
    1190                 :     }
    1191                 :   }
    1192               0 :   aScanner.BindSubstring(mTextValue, origin, end);
    1193                 : 
    1194               0 :   if (kEOF == result) {
    1195               0 :     mInError = true;
    1196               0 :     if (!aScanner.IsIncremental()) {
    1197                 :       // Hide this EOF.
    1198               0 :       result = NS_OK;
    1199                 :     }
    1200                 :   }
    1201                 : 
    1202               0 :   return result;
    1203                 : }
    1204                 : 
    1205                 : const nsSubstring&
    1206               0 : CMarkupDeclToken::GetStringValue()
    1207                 : {
    1208               0 :   return mTextValue.AsString();
    1209                 : }
    1210                 : 
    1211                 : 
    1212              25 : CCommentToken::CCommentToken()
    1213              25 :   : CHTMLToken(eHTMLTag_comment)
    1214                 : {
    1215              25 : }
    1216                 : 
    1217               0 : CCommentToken::CCommentToken(const nsAString& aName)
    1218               0 :   : CHTMLToken(eHTMLTag_comment)
    1219                 : {
    1220               0 :   mComment.Rebind(aName);
    1221               0 : }
    1222                 : 
    1223                 : void
    1224               0 : CCommentToken::AppendSourceTo(nsAString& anOutputString)
    1225                 : {
    1226               0 :   AppendUnicodeTo(mCommentDecl, anOutputString);
    1227               0 : }
    1228                 : 
    1229                 : static bool
    1230               0 : IsCommentEnd(const nsScannerIterator& aCurrent, const nsScannerIterator& aEnd,
    1231                 :              nsScannerIterator& aGt)
    1232                 : {
    1233               0 :   nsScannerIterator current = aCurrent;
    1234               0 :   PRInt32 dashes = 0;
    1235                 : 
    1236               0 :   while (current != aEnd && dashes != 2) {
    1237               0 :     if (*current == kGreaterThan) {
    1238               0 :       aGt = current;
    1239               0 :       return true;
    1240                 :     }
    1241               0 :     if (*current == PRUnichar('-')) {
    1242               0 :       ++dashes;
    1243                 :     } else {
    1244               0 :       dashes = 0;
    1245                 :     }
    1246               0 :     ++current;
    1247                 :   }
    1248                 : 
    1249               0 :   return false;
    1250                 : }
    1251                 : 
    1252                 : nsresult
    1253               0 : CCommentToken::ConsumeStrictComment(nsScanner& aScanner)
    1254                 : {
    1255                 :   // <!--[... -- ... -- ...]*-->
    1256                 :   /*********************************************************
    1257                 :     NOTE: This algorithm does a fine job of handling comments
    1258                 :           when they're formatted per spec, but if they're not
    1259                 :           we don't handle them well.
    1260                 :    *********************************************************/
    1261               0 :   nsScannerIterator end, current, gt, lt;
    1262               0 :   aScanner.EndReading(end);
    1263               0 :   aScanner.CurrentPosition(current);
    1264                 : 
    1265               0 :   nsScannerIterator beginData = end;
    1266                 : 
    1267               0 :   lt = current;
    1268               0 :   lt.advance(-2); // <!
    1269                 : 
    1270               0 :   current.advance(-1);
    1271                 : 
    1272                 :   // Regular comment must start with <!--
    1273               0 :   if (*current == kExclamation &&
    1274               0 :       ++current != end && *current == kMinus &&
    1275               0 :       ++current != end && *current == kMinus &&
    1276               0 :       ++current != end) {
    1277               0 :     nsScannerIterator currentEnd = end;
    1278               0 :     bool balancedComment = false;
    1279               0 :     NS_NAMED_LITERAL_STRING(dashes, "--");
    1280               0 :     beginData = current;
    1281                 : 
    1282               0 :     while (FindInReadable(dashes, current, currentEnd)) {
    1283               0 :       current.advance(2);
    1284                 : 
    1285               0 :       balancedComment = !balancedComment; // We need to match '--' with '--'
    1286                 : 
    1287               0 :       if (balancedComment && IsCommentEnd(current, end, gt)) {
    1288                 :         // done
    1289               0 :         current.advance(-2);
    1290                 :         // Note: it's ok if beginData == current, (we'll copy an empty string)
    1291                 :         // and we need to bind mComment anyway.
    1292               0 :         aScanner.BindSubstring(mComment, beginData, current);
    1293               0 :         aScanner.BindSubstring(mCommentDecl, lt, ++gt);
    1294               0 :         aScanner.SetPosition(gt);
    1295               0 :         return NS_OK;
    1296                 :       }
    1297                 : 
    1298                 :       // Continue after the last '--'
    1299               0 :       currentEnd = end;
    1300                 :     }
    1301                 :   }
    1302                 : 
    1303                 :   // If beginData == end, we did not find opening '--'
    1304               0 :   if (beginData == end) {
    1305                 :     // This might have been empty comment: <!>
    1306                 :     // Or it could have been something completely bogus like: <!This is foobar>
    1307                 :     // Handle both cases below
    1308               0 :     aScanner.CurrentPosition(current);
    1309               0 :     beginData = current;
    1310               0 :     if (FindCharInReadable('>', current, end)) {
    1311               0 :       aScanner.BindSubstring(mComment, beginData, current);
    1312               0 :       aScanner.BindSubstring(mCommentDecl, lt, ++current);
    1313               0 :       aScanner.SetPosition(current);
    1314               0 :       return NS_OK;
    1315                 :     }
    1316                 :   }
    1317                 : 
    1318               0 :   if (aScanner.IsIncremental()) {
    1319                 :     // We got here because we saw the beginning of a comment,
    1320                 :     // but not yet the end, and we are still loading the page. In that
    1321                 :     // case the return value here will cause us to unwind,
    1322                 :     // wait for more content, and try again.
    1323                 :     // XXX For performance reasons we should cache where we were, and
    1324                 :     //     continue from there for next call
    1325               0 :     return kEOF;
    1326                 :   }
    1327                 : 
    1328                 :   // There was no terminating string, parse this comment as text.
    1329               0 :   aScanner.SetPosition(lt, false, true);
    1330               0 :   return kNotAComment;
    1331                 : }
    1332                 : 
    1333                 : nsresult
    1334              25 : CCommentToken::ConsumeQuirksComment(nsScanner& aScanner)
    1335                 : {
    1336                 :   // <![-[-]] ... [[-]-|--!]>
    1337                 :   /*********************************************************
    1338                 :     NOTE: This algorithm does a fine job of handling comments
    1339                 :           commonly used, but it doesn't really consume them
    1340                 :           per spec (But then, neither does IE or Nav).
    1341                 :    *********************************************************/
    1342              25 :   nsScannerIterator end, current;
    1343              25 :   aScanner.EndReading(end);
    1344              25 :   aScanner.CurrentPosition(current);
    1345              25 :   nsScannerIterator beginData = current,
    1346              25 :                     beginLastMinus = end,
    1347              25 :                     bestAltCommentEnd = end,
    1348              25 :                     lt = current;
    1349              25 :   lt.advance(-2); // <!
    1350                 : 
    1351                 :   // When we get here, we have always already consumed <!
    1352                 :   // Skip over possible leading minuses
    1353              25 :   if (current != end && *current == kMinus) {
    1354              25 :     beginLastMinus = current;
    1355              25 :     ++current;
    1356              25 :     ++beginData;
    1357              25 :     if (current != end && *current == kMinus) { // <!--
    1358              25 :       beginLastMinus = current;
    1359              25 :       ++current;
    1360              25 :       ++beginData;
    1361                 :       // Long form comment
    1362                 : 
    1363              25 :       nsScannerIterator currentEnd = end, gt = end;
    1364                 : 
    1365                 :       // Find the end of the comment
    1366              50 :       while (FindCharInReadable(kGreaterThan, current, currentEnd)) {
    1367              25 :         gt = current;
    1368              25 :         if (bestAltCommentEnd == end) {
    1369              25 :           bestAltCommentEnd = gt;
    1370                 :         }
    1371              25 :         --current;
    1372              25 :         bool goodComment = false;
    1373              25 :         if (current != beginLastMinus && *current == kMinus) { // ->
    1374              25 :           --current;
    1375              25 :           if (current != beginLastMinus && *current == kMinus) { // -->
    1376              25 :             goodComment = true;
    1377              25 :             --current;
    1378                 :           }
    1379               0 :         } else if (current != beginLastMinus && *current == '!') {
    1380               0 :           --current;
    1381               0 :           if (current != beginLastMinus && *current == kMinus) {
    1382               0 :             --current;
    1383               0 :             if (current != beginLastMinus && *current == kMinus) { // --!>
    1384               0 :               --current;
    1385               0 :               goodComment = true;
    1386                 :             }
    1387                 :           }
    1388               0 :         } else if (current == beginLastMinus) {
    1389               0 :           goodComment = true;
    1390                 :         }
    1391                 : 
    1392              25 :         if (goodComment) {
    1393                 :           // done
    1394              25 :           aScanner.BindSubstring(mComment, beginData, ++current);
    1395              25 :           aScanner.BindSubstring(mCommentDecl, lt, ++gt);
    1396              25 :           aScanner.SetPosition(gt);
    1397              25 :           return NS_OK;
    1398                 :         } else {
    1399                 :           // try again starting after the last '>'
    1400               0 :           current = ++gt;
    1401               0 :           currentEnd = end;
    1402                 :         }
    1403                 :       }
    1404                 : 
    1405               0 :       if (aScanner.IsIncremental()) {
    1406                 :         // We got here because we saw the beginning of a comment,
    1407                 :         // but not yet the end, and we are still loading the page. In that
    1408                 :         // case the return value here will cause us to unwind,
    1409                 :         // wait for more content, and try again.
    1410                 :         // XXX For performance reasons we should cache where we were, and
    1411                 :         //     continue from there for next call
    1412               0 :         return kEOF;
    1413                 :       }
    1414                 : 
    1415                 :       // If you're here, then we're in a special state.
    1416                 :       // The problem at hand is that we've hit the end of the document without
    1417                 :       // finding the normal endcomment delimiter "-->".  In this case, the
    1418                 :       // first thing we try is to see if we found an alternate endcomment
    1419                 :       // delimiter ">".  If so, rewind just pass that, and use everything up
    1420                 :       // to that point as your comment.  If not, the document has no end
    1421                 :       // comment and should be treated as one big comment.
    1422               0 :       gt = bestAltCommentEnd;
    1423               0 :       aScanner.BindSubstring(mComment, beginData, gt);
    1424               0 :       if (gt != end) {
    1425               0 :         ++gt;
    1426                 :       }
    1427               0 :       aScanner.BindSubstring(mCommentDecl, lt, gt);
    1428               0 :       aScanner.SetPosition(gt);
    1429               0 :       return NS_OK;
    1430                 :     }
    1431                 :   }
    1432                 : 
    1433                 :   // This could be short form of comment
    1434                 :   // Find the end of the comment
    1435               0 :   current = beginData;
    1436               0 :   if (FindCharInReadable(kGreaterThan, current, end)) {
    1437               0 :     nsScannerIterator gt = current;
    1438               0 :     if (current != beginData) {
    1439               0 :       --current;
    1440               0 :       if (current != beginData && *current == kMinus) { // ->
    1441               0 :         --current;
    1442               0 :         if (current != beginData && *current == kMinus) { // -->
    1443               0 :           --current;
    1444                 :         }
    1445               0 :       } else if (current != beginData && *current == '!') { // !>
    1446               0 :         --current;
    1447               0 :         if (current != beginData && *current == kMinus) { // -!>
    1448               0 :           --current;
    1449               0 :           if (current != beginData && *current == kMinus) { // --!>
    1450               0 :             --current;
    1451                 :           }
    1452                 :         }
    1453                 :       }
    1454                 :     }
    1455                 : 
    1456               0 :     if (current != gt) {
    1457               0 :       aScanner.BindSubstring(mComment, beginData, ++current);
    1458                 :     } else {
    1459                 :       // Bind mComment to an empty string (note that if current == gt,
    1460                 :       // then current == beginData). We reach this for <!>
    1461               0 :       aScanner.BindSubstring(mComment, beginData, current);
    1462                 :     }
    1463               0 :     aScanner.BindSubstring(mCommentDecl, lt, ++gt);
    1464               0 :     aScanner.SetPosition(gt);
    1465               0 :     return NS_OK;
    1466                 :   }
    1467                 : 
    1468               0 :   if (!aScanner.IsIncremental()) {
    1469                 :     // This isn't a comment at all, go back to the < and consume as text.
    1470               0 :     aScanner.SetPosition(lt, false, true);
    1471               0 :     return kNotAComment;
    1472                 :   }
    1473                 : 
    1474                 :   // Wait for more data...
    1475               0 :   return kEOF;
    1476                 : }
    1477                 : 
    1478                 : /*
    1479                 :  *  Consume the identifier portion of the comment.
    1480                 :  *  Note that we've already eaten the "<!" portion.
    1481                 :  *
    1482                 :  *  @param   aChar -- last char consumed from stream
    1483                 :  *  @param   aScanner -- controller of underlying input source
    1484                 :  *  @return  error result
    1485                 :  */
    1486                 : nsresult
    1487              25 : CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
    1488                 : {
    1489              25 :   nsresult result = true;
    1490                 : 
    1491              25 :   if (aFlag & NS_IPARSER_FLAG_STRICT_MODE) {
    1492                 :     // Enabling strict comment parsing for Bug 53011 and 2749 contradicts!
    1493               0 :     result = ConsumeStrictComment(aScanner);
    1494                 :   } else {
    1495              25 :     result = ConsumeQuirksComment(aScanner);
    1496                 :   }
    1497                 : 
    1498              25 :   if (NS_SUCCEEDED(result)) {
    1499              25 :     mNewlineCount = mCommentDecl.CountChar(kNewLine);
    1500                 :   }
    1501                 : 
    1502              25 :   return result;
    1503                 : }
    1504                 : 
    1505                 : const nsSubstring&
    1506               0 : CCommentToken::GetStringValue()
    1507                 : {
    1508               0 :   return mComment.AsString();
    1509                 : }
    1510                 : 
    1511                 : PRInt32
    1512              50 : CCommentToken::GetTokenType()
    1513                 : {
    1514              50 :   return eToken_comment;
    1515                 : }
    1516                 : 
    1517             530 : CNewlineToken::CNewlineToken()
    1518             530 :   : CHTMLToken(eHTMLTag_newline)
    1519                 : {
    1520             530 : }
    1521                 : 
    1522                 : PRInt32
    1523            1590 : CNewlineToken::GetTokenType()
    1524                 : {
    1525            1590 :   return eToken_newline;
    1526                 : }
    1527                 : 
    1528                 : static nsScannerSubstring* gNewlineStr;
    1529                 : void
    1530             263 : CNewlineToken::AllocNewline()
    1531                 : {
    1532             526 :   gNewlineStr = new nsScannerSubstring(NS_LITERAL_STRING("\n"));
    1533             263 : }
    1534                 : 
    1535                 : void
    1536             263 : CNewlineToken::FreeNewline()
    1537                 : {
    1538             263 :   if (gNewlineStr) {
    1539             263 :     delete gNewlineStr;
    1540             263 :     gNewlineStr = nsnull;
    1541                 :   }
    1542             263 : }
    1543                 : 
    1544                 : /**
    1545                 :  *  This method retrieves the value of this internal string.
    1546                 :  *
    1547                 :  *  @return nsString reference to internal string value
    1548                 :  */
    1549                 : const nsSubstring&
    1550               0 : CNewlineToken::GetStringValue()
    1551                 : {
    1552               0 :   return gNewlineStr->AsString();
    1553                 : }
    1554                 : 
    1555                 : /*
    1556                 :  * Consume one newline (cr/lf pair).
    1557                 :  *
    1558                 :  *  @param   aChar -- last char consumed from stream
    1559                 :  *  @param   aScanner -- controller of underlying input source
    1560                 :  *  @return  error result
    1561                 :  */
    1562                 : nsresult
    1563             530 : CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
    1564                 : {
    1565                 :   /*
    1566                 :    * Here's what the HTML spec says about newlines:
    1567                 :    *
    1568                 :    * "A line break is defined to be a carriage return (&#x000D;),
    1569                 :    * a line feed (&#x000A;), or a carriage return/line feed pair.
    1570                 :    * All line breaks constitute white space."
    1571                 :    */
    1572                 : 
    1573             530 :   nsresult rv = NS_OK;
    1574             530 :   if (aChar == kCR) {
    1575                 :     PRUnichar theChar;
    1576               0 :     rv = aScanner.Peek(theChar);
    1577               0 :     if (theChar == kNewLine) {
    1578               0 :       rv = aScanner.GetChar(theChar);
    1579               0 :     } else if (rv == kEOF && !aScanner.IsIncremental()) {
    1580                 :       // Make sure we don't lose information about this trailing newline.
    1581               0 :       rv = NS_OK;
    1582                 :     }
    1583                 :   }
    1584                 : 
    1585             530 :   mNewlineCount = 1;
    1586             530 :   return rv;
    1587                 : }
    1588                 : 
    1589             732 : CAttributeToken::CAttributeToken()
    1590             732 :   : CHTMLToken(eHTMLTag_unknown)
    1591                 : {
    1592             732 :   mHasEqualWithoutValue = false;
    1593             732 : }
    1594                 : 
    1595                 : /*
    1596                 :  * String based constructor
    1597                 :  */
    1598               0 : CAttributeToken::CAttributeToken(const nsAString& aName)
    1599               0 :   : CHTMLToken(eHTMLTag_unknown)
    1600                 : {
    1601               0 :   mTextValue.writable().Assign(aName);
    1602               0 :   mHasEqualWithoutValue = false;
    1603               0 : }
    1604                 : 
    1605                 : /*
    1606                 :  *  construct initializing data to key value pair
    1607                 :  */
    1608               0 : CAttributeToken::CAttributeToken(const nsAString& aKey, const nsAString& aName)
    1609               0 :   : CHTMLToken(eHTMLTag_unknown)
    1610                 : {
    1611               0 :   mTextValue.writable().Assign(aName);
    1612               0 :   mTextKey.Rebind(aKey);
    1613               0 :   mHasEqualWithoutValue = false;
    1614               0 : }
    1615                 : 
    1616                 : PRInt32
    1617            1468 : CAttributeToken::GetTokenType()
    1618                 : {
    1619            1468 :   return eToken_attribute;
    1620                 : }
    1621                 : 
    1622                 : const nsSubstring&
    1623               0 : CAttributeToken::GetStringValue()
    1624                 : {
    1625               0 :   return mTextValue.str();
    1626                 : }
    1627                 : 
    1628                 : void
    1629               0 : CAttributeToken::GetSource(nsString& anOutputString)
    1630                 : {
    1631               0 :   anOutputString.Truncate();
    1632               0 :   AppendSourceTo(anOutputString);
    1633               0 : }
    1634                 : 
    1635                 : void
    1636               0 : CAttributeToken::AppendSourceTo(nsAString& anOutputString)
    1637                 : {
    1638               0 :   AppendUnicodeTo(mTextKey, anOutputString);
    1639               0 :   if (mTextValue.str().Length() || mHasEqualWithoutValue) {
    1640               0 :     anOutputString.AppendLiteral("=");
    1641                 :   }
    1642               0 :   anOutputString.Append(mTextValue.str());
    1643                 :   // anOutputString.AppendLiteral(";");
    1644               0 : }
    1645                 : 
    1646                 : /*
    1647                 :  * This general purpose method is used when you want to
    1648                 :  * consume a known quoted string.
    1649                 :  */
    1650                 : static nsresult
    1651             732 : ConsumeQuotedString(PRUnichar aChar,
    1652                 :                     nsScannerSharedSubstring& aString,
    1653                 :                     PRInt32& aNewlineCount,
    1654                 :                     nsScanner& aScanner,
    1655                 :                     PRInt32 aFlag)
    1656                 : {
    1657             732 :   NS_ASSERTION(aChar == kQuote || aChar == kApostrophe,
    1658                 :                "char is neither quote nor apostrophe");
    1659                 :   // Hold onto this in case this is an unterminated string literal
    1660             732 :   PRUint32 origLen = aString.str().Length();
    1661                 : 
    1662                 :   static const PRUnichar theTerminalCharsQuote[] = {
    1663                 :     PRUnichar(kQuote), PRUnichar('&'), PRUnichar(kCR),
    1664                 :     PRUnichar(kNewLine), PRUnichar(0) };
    1665                 :   static const PRUnichar theTerminalCharsApostrophe[] = {
    1666                 :     PRUnichar(kApostrophe), PRUnichar('&'), PRUnichar(kCR),
    1667                 :     PRUnichar(kNewLine), PRUnichar(0) };
    1668                 :   static const nsReadEndCondition
    1669             732 :     theTerminateConditionQuote(theTerminalCharsQuote);
    1670                 :   static const nsReadEndCondition
    1671             732 :     theTerminateConditionApostrophe(theTerminalCharsApostrophe);
    1672                 : 
    1673                 :   // Assume Quote to init to something
    1674             732 :   const nsReadEndCondition *terminateCondition = &theTerminateConditionQuote;
    1675             732 :   if (aChar == kApostrophe) {
    1676               0 :     terminateCondition = &theTerminateConditionApostrophe;
    1677                 :   }
    1678                 : 
    1679             732 :   nsresult result = NS_OK;
    1680             732 :   nsScannerIterator theOffset;
    1681             732 :   aScanner.CurrentPosition(theOffset);
    1682                 : 
    1683                 :   result = ConsumeUntil(aString, aNewlineCount, aScanner,
    1684             732 :                       *terminateCondition, true, true, aFlag);
    1685                 : 
    1686             732 :   if (NS_SUCCEEDED(result)) {
    1687             732 :     result = aScanner.GetChar(aChar); // aChar should be " or '
    1688                 :   }
    1689                 : 
    1690                 :   // Ref: Bug 35806
    1691                 :   // A back up measure when disaster strikes...
    1692                 :   // Ex <table> <tr d="><td>hello</td></tr></table>
    1693            1464 :   if (!aString.str().IsEmpty() && aString.str().Last() != aChar &&
    1694             732 :       !aScanner.IsIncremental() && result == kEOF) {
    1695                 :     static const nsReadEndCondition
    1696               0 :       theAttributeTerminator(kAttributeTerminalChars);
    1697               0 :     aString.writable().Truncate(origLen);
    1698               0 :     aScanner.SetPosition(theOffset, false, true);
    1699                 :     result = ConsumeUntil(aString, aNewlineCount, aScanner,
    1700               0 :                           theAttributeTerminator, false, true, aFlag);
    1701               0 :     if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    1702                 :       // Remember that this string literal was unterminated.
    1703               0 :       result = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
    1704                 :     }
    1705                 :   }
    1706             732 :   return result;
    1707                 : }
    1708                 : 
    1709                 : /*
    1710                 :  * This method is meant to be used by view-source to consume invalid attributes.
    1711                 :  * For the purposes of this method, an invalid attribute is an attribute that
    1712                 :  * starts with either ', ", or /. We consume all ', ", or / and the following
    1713                 :  * whitespace.
    1714                 :  *
    1715                 :  * @param aScanner -- the scanner we're reading our data from.
    1716                 :  * @param aChar -- the character we're skipping
    1717                 :  * @param aCurrent -- the current position that we're looking at.
    1718                 :  * @param aNewlineCount -- a count of the newlines we've consumed.
    1719                 :  * @return error result.
    1720                 :  */
    1721                 : static nsresult
    1722               0 : ConsumeInvalidAttribute(nsScanner& aScanner,
    1723                 :                         PRUnichar aChar,
    1724                 :                         nsScannerIterator& aCurrent,
    1725                 :                         PRInt32& aNewlineCount)
    1726                 : {
    1727               0 :   NS_ASSERTION(aChar == kApostrophe || aChar == kQuote || aChar == kForwardSlash,
    1728                 :                "aChar must be a quote or apostrophe");
    1729               0 :   nsScannerIterator end, wsbeg;
    1730               0 :   aScanner.EndReading(end);
    1731                 : 
    1732               0 :   while (aCurrent != end && *aCurrent == aChar) {
    1733               0 :     ++aCurrent;
    1734                 :   }
    1735                 : 
    1736               0 :   aScanner.SetPosition(aCurrent);
    1737               0 :   return aScanner.ReadWhitespace(wsbeg, aCurrent, aNewlineCount);
    1738                 : }
    1739                 : 
    1740                 : /*
    1741                 :  * Consume the key and value portions of the attribute.
    1742                 :  */
    1743                 : nsresult
    1744             732 : CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
    1745                 : {
    1746                 :   nsresult result;
    1747             732 :   nsScannerIterator wsstart, wsend;
    1748                 : 
    1749             732 :   if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    1750               0 :     result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount);
    1751               0 :     if (kEOF == result && wsstart != wsend) {
    1752                 :       // Do this here so if this is the final token in the document, we don't
    1753                 :       // lose the whitespace.
    1754               0 :       aScanner.BindSubstring(mTextKey, wsstart, wsend);
    1755                 :     }
    1756                 :   } else {
    1757             732 :     result = aScanner.SkipWhitespace(mNewlineCount);
    1758                 :   }
    1759                 : 
    1760             732 :   if (NS_OK == result) {
    1761                 :     static const PRUnichar theTerminalsChars[] =
    1762                 :     { PRUnichar(' '), PRUnichar('"'),
    1763                 :       PRUnichar('='), PRUnichar('\n'),
    1764                 :       PRUnichar('\r'), PRUnichar('\t'),
    1765                 :       PRUnichar('>'), PRUnichar('<'),
    1766                 :       PRUnichar('\''), PRUnichar('/'),
    1767                 :       PRUnichar(0) };
    1768             732 :     static const nsReadEndCondition theEndCondition(theTerminalsChars);
    1769                 : 
    1770             732 :     nsScannerIterator start, end;
    1771             732 :     result = aScanner.ReadUntil(start, end, theEndCondition, false);
    1772                 : 
    1773             732 :     if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    1774             732 :       aScanner.BindSubstring(mTextKey, start, end);
    1775               0 :     } else if (kEOF == result && wsstart != end) {
    1776                 :       // Capture all of the text (from the beginning of the whitespace to the
    1777                 :       // end of the document).
    1778               0 :       aScanner.BindSubstring(mTextKey, wsstart, end);
    1779                 :     }
    1780                 : 
    1781                 :     // Now it's time to Consume the (optional) value...
    1782             732 :     if (NS_OK == result) {
    1783             732 :       if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    1784               0 :         result = aScanner.ReadWhitespace(start, wsend, mNewlineCount);
    1785               0 :         aScanner.BindSubstring(mTextKey, wsstart, wsend);
    1786                 :       } else {
    1787             732 :         result = aScanner.SkipWhitespace(mNewlineCount);
    1788                 :       }
    1789                 : 
    1790             732 :       if (NS_OK == result) {
    1791                 :         // Skip ahead until you find an equal sign or a '>'...
    1792             732 :         result = aScanner.Peek(aChar);
    1793             732 :         if (NS_OK == result) {
    1794             732 :           if (kEqual == aChar) {
    1795             732 :             result = aScanner.GetChar(aChar);  // Skip the equal sign...
    1796             732 :             if (NS_OK == result) {
    1797             732 :               if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    1798                 :                 bool haveCR;
    1799                 :                 result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
    1800               0 :                                                  haveCR);
    1801                 :               } else {
    1802             732 :                 result = aScanner.SkipWhitespace(mNewlineCount);
    1803                 :               }
    1804                 : 
    1805             732 :               if (NS_OK == result) {
    1806             732 :                 result = aScanner.Peek(aChar);  // And grab the next char.
    1807             732 :                 if (NS_OK == result) {
    1808             732 :                   if (kQuote == aChar || kApostrophe == aChar) {
    1809             732 :                     aScanner.GetChar(aChar);
    1810             732 :                     if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    1811               0 :                       mTextValue.writable().Append(aChar);
    1812                 :                     }
    1813                 : 
    1814                 :                     result = ConsumeQuotedString(aChar, mTextValue,
    1815                 :                                                  mNewlineCount, aScanner,
    1816             732 :                                                  aFlag);
    1817            1464 :                     if (NS_SUCCEEDED(result) &&
    1818                 :                         (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    1819               0 :                       mTextValue.writable().Append(aChar);
    1820             732 :                     } else if (result ==
    1821                 :                                 NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL) {
    1822               0 :                       result = NS_OK;
    1823               0 :                       mInError = true;
    1824                 :                     }
    1825                 :                     // According to spec. we ( who? ) should ignore linefeeds.
    1826                 :                     // But look, even the carriage return was getting stripped
    1827                 :                     // ( wonder why! ) - Ref. to bug 15204.  Okay, so the
    1828                 :                     // spec. told us to ignore linefeeds, bug then what about
    1829                 :                     // bug 47535 ? Should we preserve everything then?  Well,
    1830                 :                     // let's make it so!
    1831               0 :                   } else if (kGreaterThan == aChar) {
    1832               0 :                     mHasEqualWithoutValue = true;
    1833               0 :                     mInError = true;
    1834                 :                   } else {
    1835                 :                     static const nsReadEndCondition
    1836               0 :                       theAttributeTerminator(kAttributeTerminalChars);
    1837                 :                     result =
    1838                 :                       ConsumeUntil(mTextValue,
    1839                 :                                    mNewlineCount,
    1840                 :                                    aScanner,
    1841                 :                                    theAttributeTerminator,
    1842                 :                                    false,
    1843                 :                                    true,
    1844               0 :                                    aFlag);
    1845                 :                   }
    1846                 :                 }
    1847             732 :                 if (NS_OK == result) {
    1848             732 :                   if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    1849                 :                     bool haveCR;
    1850                 :                     result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
    1851               0 :                                                      haveCR);
    1852                 :                   } else {
    1853             732 :                     result = aScanner.SkipWhitespace(mNewlineCount);
    1854                 :                   }
    1855                 :                 }
    1856                 :               } else {
    1857                 :                 // We saw an equal sign but ran out of room looking for a value.
    1858               0 :                 mHasEqualWithoutValue = true;
    1859               0 :                 mInError = true;
    1860                 :               }
    1861                 :             }
    1862                 :           } else {
    1863                 :             // This is where we have to handle fairly busted content.
    1864                 :             // If you're here, it means we saw an attribute name, but couldn't
    1865                 :             // find the following equal sign.  <tag NAME....
    1866                 : 
    1867                 :             // Doing this right in all cases is <i>REALLY</i> ugly.
    1868                 :             // My best guess is to grab the next non-ws char. We know it's not
    1869                 :             // '=', so let's see what it is. If it's a '"', then assume we're
    1870                 :             // reading from the middle of the value. Try stripping the quote
    1871                 :             // and continuing...  Note that this code also strips forward
    1872                 :             // slashes to handle cases like <tag NAME/>
    1873               0 :             if (kQuote == aChar || kApostrophe == aChar ||
    1874                 :                 kForwardSlash == aChar) {
    1875                 :               // In XML, a trailing slash isn't an error.
    1876               0 :               if (kForwardSlash != aChar || !(aFlag & NS_IPARSER_FLAG_XML)) {
    1877               0 :                 mInError = true;
    1878                 :               }
    1879                 : 
    1880               0 :               if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    1881               0 :                 result = aScanner.SkipOver(aChar); // Strip quote or slash.
    1882               0 :                 if (NS_SUCCEEDED(result)) {
    1883               0 :                   result = aScanner.SkipWhitespace(mNewlineCount);
    1884                 :                 }
    1885                 :               } else {
    1886                 :                 // We want to collect whitespace here so that following
    1887                 :                 // attributes can have the right line number (and for
    1888                 :                 // parity with the non-view-source code above).
    1889                 :                 result = ConsumeInvalidAttribute(aScanner, aChar,
    1890               0 :                                                  wsend, mNewlineCount);
    1891                 : 
    1892               0 :                 aScanner.BindSubstring(mTextKey, wsstart, wsend);
    1893               0 :                 aScanner.SetPosition(wsend);
    1894                 :               }
    1895                 :             }
    1896                 :           }
    1897                 :         }
    1898                 :       }
    1899                 :     }
    1900                 : 
    1901             732 :     if (NS_OK == result) {
    1902             732 :       if (mTextValue.str().Length() == 0 && mTextKey.Length() == 0 &&
    1903               0 :           mNewlineCount == 0 && !mHasEqualWithoutValue) {
    1904                 :         // This attribute contains no useful information for us, so there is no
    1905                 :         // use in keeping it around. Attributes that are otherwise empty, but
    1906                 :         // have newlines in them are passed on the the DTD so it can get line
    1907                 :         // numbering right.
    1908               0 :         return NS_ERROR_HTMLPARSER_BADATTRIBUTE;
    1909                 :       }
    1910                 :     }
    1911                 :   }
    1912                 : 
    1913             732 :   if (kEOF == result && !aScanner.IsIncremental()) {
    1914                 :     // This is our run-of-the mill "don't lose content at the end of a
    1915                 :     // document" with a slight twist: we don't want to bother returning an
    1916                 :     // empty attribute key, even if this is the end of the document.
    1917               0 :     if (mTextKey.Length() == 0) {
    1918               0 :       result = NS_ERROR_HTMLPARSER_BADATTRIBUTE;
    1919                 :     } else {
    1920               0 :       result = NS_OK;
    1921                 :     }
    1922                 :   }
    1923                 : 
    1924             732 :   return result;
    1925                 : }
    1926                 : 
    1927                 : void
    1928               0 : CAttributeToken::SetKey(const nsAString& aKey)
    1929                 : {
    1930               0 :   mTextKey.Rebind(aKey);
    1931               0 : }
    1932                 : 
    1933                 : void
    1934               0 : CAttributeToken::BindKey(nsScanner* aScanner,
    1935                 :                          nsScannerIterator& aStart,
    1936                 :                          nsScannerIterator& aEnd)
    1937                 : {
    1938               0 :   aScanner->BindSubstring(mTextKey, aStart, aEnd);
    1939               0 : }
    1940                 : 
    1941             281 : CWhitespaceToken::CWhitespaceToken()
    1942             281 :   : CHTMLToken(eHTMLTag_whitespace)
    1943                 : {
    1944             281 : }
    1945                 : 
    1946               0 : CWhitespaceToken::CWhitespaceToken(const nsAString& aName)
    1947               0 :   : CHTMLToken(eHTMLTag_whitespace)
    1948                 : {
    1949               0 :   mTextValue.writable().Assign(aName);
    1950               0 : }
    1951                 : 
    1952             843 : PRInt32 CWhitespaceToken::GetTokenType()
    1953                 : {
    1954             843 :   return eToken_whitespace;
    1955                 : }
    1956                 : 
    1957                 : /*
    1958                 :  * This general purpose method is used when you want to
    1959                 :  * consume an aribrary sequence of whitespace.
    1960                 :  *
    1961                 :  *  @param   aChar -- last char consumed from stream
    1962                 :  *  @param   aScanner -- controller of underlying input source
    1963                 :  *  @return  error result
    1964                 :  */
    1965                 : nsresult
    1966             281 : CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
    1967                 : {
    1968                 :   // If possible, we'd like to just be a dependent substring starting at
    1969                 :   // |aChar|.  The scanner has already been advanced, so we need to
    1970                 :   // back it up to facilitate this.
    1971                 : 
    1972             281 :   nsScannerIterator start;
    1973             281 :   aScanner.CurrentPosition(start);
    1974             281 :   aScanner.SetPosition(--start, false, true);
    1975                 : 
    1976                 :   bool haveCR;
    1977                 : 
    1978             281 :   nsresult result = aScanner.ReadWhitespace(mTextValue, mNewlineCount, haveCR);
    1979                 : 
    1980             281 :   if (result == kEOF && !aScanner.IsIncremental()) {
    1981                 :     // Oops, we ran off the end, make sure we don't lose the trailing
    1982                 :     // whitespace!
    1983               0 :     result = NS_OK;
    1984                 :   }
    1985                 : 
    1986             281 :   if (NS_OK == result && haveCR) {
    1987               0 :     mTextValue.writable().StripChar(kCR);
    1988                 :   }
    1989             281 :   return result;
    1990                 : }
    1991                 : 
    1992                 : const nsSubstring&
    1993               0 : CWhitespaceToken::GetStringValue()
    1994                 : {
    1995               0 :   return mTextValue.str();
    1996                 : }
    1997                 : 
    1998               0 : CEntityToken::CEntityToken()
    1999               0 :   : CHTMLToken(eHTMLTag_entity)
    2000                 : {
    2001               0 : }
    2002                 : 
    2003               0 : CEntityToken::CEntityToken(const nsAString& aName)
    2004               0 :   : CHTMLToken(eHTMLTag_entity)
    2005                 : {
    2006               0 :   mTextValue.Assign(aName);
    2007               0 : }
    2008                 : 
    2009                 : 
    2010                 : /*
    2011                 :  *  Consume the rest of the entity. We've already eaten the "&".
    2012                 :  *
    2013                 :  *  @param   aChar -- last char consumed from stream
    2014                 :  *  @param   aScanner -- controller of underlying input source
    2015                 :  *  @return  error result
    2016                 :  */
    2017                 : nsresult
    2018               0 : CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
    2019                 : {
    2020               0 :   nsresult result = ConsumeEntity(aChar, mTextValue, aScanner);
    2021               0 :   return result;
    2022                 : }
    2023                 : 
    2024                 : PRInt32
    2025               0 : CEntityToken::GetTokenType()
    2026                 : {
    2027               0 :   return eToken_entity;
    2028                 : }
    2029                 : 
    2030                 : /*
    2031                 :  * This general purpose method is used when you want to
    2032                 :  * consume an entity &xxxx;. Keep in mind that entities
    2033                 :  * are <i>not</i> reduced inline.
    2034                 :  *
    2035                 :  * @param   aChar -- last char consumed from stream
    2036                 :  * @param   aScanner -- controller of underlying input source
    2037                 :  * @return  error result
    2038                 :  */
    2039                 : nsresult
    2040              11 : CEntityToken::ConsumeEntity(PRUnichar aChar,
    2041                 :                             nsString& aString,
    2042                 :                             nsScanner& aScanner)
    2043                 : {
    2044              11 :   nsresult result = NS_OK;
    2045              11 :   if (kLeftBrace == aChar) {
    2046                 :     // You're consuming a script entity...
    2047               0 :     aScanner.GetChar(aChar); // Consume &
    2048                 : 
    2049               0 :     PRInt32 rightBraceCount = 0;
    2050               0 :     PRInt32 leftBraceCount  = 0;
    2051                 : 
    2052               0 :     do {
    2053               0 :       result = aScanner.GetChar(aChar);
    2054                 : 
    2055               0 :       if (NS_FAILED(result)) {
    2056               0 :         return result;
    2057                 :       }
    2058                 : 
    2059               0 :       aString.Append(aChar);
    2060               0 :       if (aChar == kRightBrace) {
    2061               0 :         ++rightBraceCount;
    2062               0 :       } else if (aChar == kLeftBrace) {
    2063               0 :         ++leftBraceCount;
    2064                 :       }
    2065                 :     } while (leftBraceCount != rightBraceCount);
    2066                 :   } else {
    2067              11 :     PRUnichar theChar = 0;
    2068              11 :     if (kHashsign == aChar) {
    2069               0 :       result = aScanner.Peek(theChar, 2);
    2070                 : 
    2071               0 :       if (NS_FAILED(result)) {
    2072               0 :         if (kEOF == result && !aScanner.IsIncremental()) {
    2073                 :           // If this is the last buffer then we are certainly
    2074                 :           // not dealing with an entity. That's, there are
    2075                 :           // no more characters after &#. Bug 188278.
    2076               0 :           return NS_HTMLTOKENS_NOT_AN_ENTITY;
    2077                 :         }
    2078               0 :         return result;
    2079                 :       }
    2080                 : 
    2081               0 :       if (nsCRT::IsAsciiDigit(theChar)) {
    2082               0 :         aScanner.GetChar(aChar); // Consume &
    2083               0 :         aScanner.GetChar(aChar); // Consume #
    2084               0 :         aString.Assign(aChar);
    2085               0 :         result = aScanner.ReadNumber(aString, 10);
    2086               0 :       } else if (theChar == 'x' || theChar == 'X') {
    2087               0 :         aScanner.GetChar(aChar);   // Consume &
    2088               0 :         aScanner.GetChar(aChar);   // Consume #
    2089               0 :         aScanner.GetChar(theChar); // Consume x
    2090               0 :         aString.Assign(aChar);
    2091               0 :         aString.Append(theChar);
    2092               0 :         result = aScanner.ReadNumber(aString, 16);
    2093                 :       } else {
    2094               0 :         return NS_HTMLTOKENS_NOT_AN_ENTITY;
    2095                 :       }
    2096                 :     } else {
    2097              11 :       result = aScanner.Peek(theChar, 1);
    2098                 : 
    2099              11 :       if (NS_FAILED(result)) {
    2100               0 :         return result;
    2101                 :       }
    2102                 : 
    2103              11 :       if (nsCRT::IsAsciiAlpha(theChar) ||
    2104                 :         theChar == '_' ||
    2105                 :         theChar == ':') {
    2106              11 :         aScanner.GetChar(aChar); // Consume &
    2107              11 :         result = aScanner.ReadEntityIdentifier(aString);
    2108                 :       } else {
    2109               0 :         return NS_HTMLTOKENS_NOT_AN_ENTITY;
    2110                 :       }
    2111                 :     }
    2112                 :   }
    2113                 : 
    2114              11 :   if (NS_FAILED(result)) {
    2115               0 :     return result;
    2116                 :   }
    2117                 : 
    2118              11 :   result = aScanner.Peek(aChar);
    2119                 : 
    2120              11 :   if (NS_FAILED(result)) {
    2121               0 :     return result;
    2122                 :   }
    2123                 : 
    2124              11 :   if (aChar == kSemicolon) {
    2125                 :     // Consume semicolon that stopped the scan
    2126              11 :     aString.Append(aChar);
    2127              11 :     result = aScanner.GetChar(aChar);
    2128                 :   }
    2129                 : 
    2130              11 :   return result;
    2131                 : }
    2132                 : 
    2133                 : /**
    2134                 :  * Map some illegal but commonly used numeric entities into their
    2135                 :  * appropriate unicode value.
    2136                 :  */
    2137                 : #define NOT_USED 0xfffd
    2138                 : 
    2139                 : static const PRUint16 PA_HackTable[] = {
    2140                 :         0x20ac,  /* EURO SIGN */
    2141                 :         NOT_USED,
    2142                 :         0x201a,  /* SINGLE LOW-9 QUOTATION MARK */
    2143                 :         0x0192,  /* LATIN SMALL LETTER F WITH HOOK */
    2144                 :         0x201e,  /* DOUBLE LOW-9 QUOTATION MARK */
    2145                 :         0x2026,  /* HORIZONTAL ELLIPSIS */
    2146                 :         0x2020,  /* DAGGER */
    2147                 :         0x2021,  /* DOUBLE DAGGER */
    2148                 :         0x02c6,  /* MODIFIER LETTER CIRCUMFLEX ACCENT */
    2149                 :         0x2030,  /* PER MILLE SIGN */
    2150                 :         0x0160,  /* LATIN CAPITAL LETTER S WITH CARON */
    2151                 :         0x2039,  /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
    2152                 :         0x0152,  /* LATIN CAPITAL LIGATURE OE */
    2153                 :         NOT_USED,
    2154                 :         0x017D,  /* LATIN CAPITAL LETTER Z WITH CARON */
    2155                 :         NOT_USED,
    2156                 :         NOT_USED,
    2157                 :         0x2018,  /* LEFT SINGLE QUOTATION MARK */
    2158                 :         0x2019,  /* RIGHT SINGLE QUOTATION MARK */
    2159                 :         0x201c,  /* LEFT DOUBLE QUOTATION MARK */
    2160                 :         0x201d,  /* RIGHT DOUBLE QUOTATION MARK */
    2161                 :         0x2022,  /* BULLET */
    2162                 :         0x2013,  /* EN DASH */
    2163                 :         0x2014,  /* EM DASH */
    2164                 :         0x02dc,  /* SMALL TILDE */
    2165                 :         0x2122,  /* TRADE MARK SIGN */
    2166                 :         0x0161,  /* LATIN SMALL LETTER S WITH CARON */
    2167                 :         0x203a,  /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
    2168                 :         0x0153,  /* LATIN SMALL LIGATURE OE */
    2169                 :         NOT_USED,
    2170                 :         0x017E,  /* LATIN SMALL LETTER Z WITH CARON */
    2171                 :         0x0178   /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
    2172                 : };
    2173                 : 
    2174                 : static void
    2175               0 : AppendNCR(nsSubstring& aString, PRInt32 aNCRValue)
    2176                 : {
    2177                 :   /* For some illegal, but popular usage */
    2178               0 :   if (aNCRValue >= 0x0080 && aNCRValue <= 0x009f) {
    2179               0 :     aNCRValue = PA_HackTable[aNCRValue - 0x0080];
    2180                 :   }
    2181                 : 
    2182               0 :   AppendUCS4ToUTF16(ENSURE_VALID_CHAR(aNCRValue), aString);
    2183               0 : }
    2184                 : 
    2185                 : /*
    2186                 :  * This method converts this entity into its underlying
    2187                 :  * unicode equivalent.
    2188                 :  *
    2189                 :  *  @param   aString will hold the resulting string value
    2190                 :  *  @return  numeric (unichar) value
    2191                 :  */
    2192                 : PRInt32
    2193               0 : CEntityToken::TranslateToUnicodeStr(nsString& aString)
    2194                 : {
    2195               0 :   PRInt32 value = 0;
    2196                 : 
    2197               0 :   if (mTextValue.Length() > 1) {
    2198               0 :     PRUnichar theChar0 = mTextValue.CharAt(0);
    2199                 : 
    2200               0 :     if (kHashsign == theChar0) {
    2201               0 :       PRInt32 err = 0;
    2202                 : 
    2203               0 :       value = mTextValue.ToInteger(&err, kAutoDetect);
    2204                 : 
    2205               0 :       if (0 == err) {
    2206               0 :         AppendNCR(aString, value);
    2207                 :       }
    2208                 :     } else {
    2209               0 :       value = nsHTMLEntities::EntityToUnicode(mTextValue);
    2210               0 :       if (-1 < value) {
    2211                 :         // We found a named entity...
    2212               0 :         aString.Assign(PRUnichar(value));
    2213                 :       }
    2214                 :     }
    2215                 :   }
    2216                 : 
    2217               0 :   return value;
    2218                 : }
    2219                 : 
    2220                 : 
    2221                 : const
    2222               0 : nsSubstring& CEntityToken::GetStringValue()
    2223                 : {
    2224               0 :   return mTextValue;
    2225                 : }
    2226                 : 
    2227                 : void
    2228               0 : CEntityToken::GetSource(nsString& anOutputString)
    2229                 : {
    2230               0 :   anOutputString.AppendLiteral("&");
    2231               0 :   anOutputString += mTextValue;
    2232                 :   // Any possible ; is part of our text value.
    2233               0 : }
    2234                 : 
    2235                 : void
    2236               0 : CEntityToken::AppendSourceTo(nsAString& anOutputString)
    2237                 : {
    2238               0 :   anOutputString.AppendLiteral("&");
    2239               0 :   anOutputString += mTextValue;
    2240                 :   // Any possible ; is part of our text value.
    2241               0 : }
    2242                 : 
    2243                 : const PRUnichar*
    2244               0 : GetTagName(PRInt32 aTag)
    2245                 : {
    2246               0 :   const PRUnichar *result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);
    2247                 : 
    2248               0 :   if (result) {
    2249               0 :     return result;
    2250                 :   }
    2251                 : 
    2252               0 :   if (aTag >= eHTMLTag_userdefined) {
    2253               0 :     return sUserdefined;
    2254                 :   }
    2255                 : 
    2256               0 :   return 0;
    2257                 : }
    2258                 : 
    2259                 : 
    2260               0 : CInstructionToken::CInstructionToken()
    2261               0 :   : CHTMLToken(eHTMLTag_instruction)
    2262                 : {
    2263               0 : }
    2264                 : 
    2265               0 : CInstructionToken::CInstructionToken(const nsAString& aString)
    2266               0 :   : CHTMLToken(eHTMLTag_unknown)
    2267                 : {
    2268               0 :   mTextValue.Assign(aString);
    2269               0 : }
    2270                 : 
    2271                 : nsresult
    2272               0 : CInstructionToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
    2273                 : {
    2274               0 :   mTextValue.AssignLiteral("<?");
    2275               0 :   nsresult result = NS_OK;
    2276               0 :   bool done = false;
    2277                 : 
    2278               0 :   while (NS_OK == result && !done) {
    2279                 :     // Note, this call does *not* consume the >.
    2280               0 :     result = aScanner.ReadUntil(mTextValue, kGreaterThan, false);
    2281               0 :     if (NS_SUCCEEDED(result)) {
    2282                 :       // In HTML, PIs end with a '>', in XML, they end with a '?>'. Cover both
    2283                 :       // cases here.
    2284               0 :       if (!(aFlag & NS_IPARSER_FLAG_XML) ||
    2285               0 :           kQuestionMark == mTextValue.Last()) {
    2286                 :         // This really is the end of the PI.
    2287               0 :         done = true;
    2288                 :       }
    2289                 :       // Need to append this character no matter what.
    2290               0 :       aScanner.GetChar(aChar);
    2291               0 :       mTextValue.Append(aChar);
    2292                 :     }
    2293                 :   }
    2294                 : 
    2295               0 :   if (kEOF == result && !aScanner.IsIncremental()) {
    2296                 :     // Hide the EOF result because there is no more text coming.
    2297               0 :     mInError = true;
    2298               0 :     result = NS_OK;
    2299                 :   }
    2300                 : 
    2301               0 :   return result;
    2302                 : }
    2303                 : 
    2304                 : PRInt32
    2305               0 : CInstructionToken::GetTokenType()
    2306                 : {
    2307               0 :   return eToken_instruction;
    2308                 : }
    2309                 : 
    2310                 : const nsSubstring&
    2311               0 : CInstructionToken::GetStringValue()
    2312                 : {
    2313               0 :   return mTextValue;
    2314                 : }
    2315                 : 
    2316                 : // Doctype decl token
    2317                 : 
    2318              25 : CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag)
    2319              25 :   : CHTMLToken(aTag)
    2320                 : {
    2321              25 : }
    2322                 : 
    2323               0 : CDoctypeDeclToken::CDoctypeDeclToken(const nsAString& aString, eHTMLTags aTag)
    2324               0 :   : CHTMLToken(aTag), mTextValue(aString)
    2325                 : {
    2326               0 : }
    2327                 : 
    2328                 : /**
    2329                 :  *  This method consumes a doctype element.
    2330                 :  *  Note: I'm rewriting this method to seek to the first <, since quotes can
    2331                 :  *  really screw us up.
    2332                 :  *  XXX Maybe this should do better in XML or strict mode?
    2333                 :  */
    2334                 : nsresult
    2335              25 : CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
    2336                 : {
    2337                 :   static const PRUnichar terminalChars[] =
    2338                 :   { PRUnichar('>'), PRUnichar('<'),
    2339                 :     PRUnichar(0)
    2340                 :   };
    2341              25 :   static const nsReadEndCondition theEndCondition(terminalChars);
    2342                 : 
    2343              25 :   nsScannerIterator start, end;
    2344                 : 
    2345              25 :   aScanner.CurrentPosition(start);
    2346              25 :   aScanner.EndReading(end);
    2347                 : 
    2348              25 :   nsresult result = aScanner.ReadUntil(start, end, theEndCondition, false);
    2349                 : 
    2350              25 :   if (NS_SUCCEEDED(result)) {
    2351                 :     PRUnichar ch;
    2352              25 :     aScanner.Peek(ch);
    2353              25 :     if (ch == kGreaterThan) {
    2354                 :       // Include '>' but not '<' since '<'
    2355                 :       // could belong to another tag.
    2356              25 :       aScanner.GetChar(ch);
    2357              25 :       end.advance(1);
    2358                 :     } else {
    2359               0 :       NS_ASSERTION(kLessThan == ch,
    2360                 :                    "Make sure this doctype decl. is really in error.");
    2361               0 :       mInError = true;
    2362                 :     }
    2363               0 :   } else if (!aScanner.IsIncremental()) {
    2364                 :     // We have reached the document end but haven't
    2365                 :     // found either a '<' or a '>'. Therefore use
    2366                 :     // whatever we have.
    2367               0 :     mInError = true;
    2368               0 :     result = NS_OK;
    2369                 :   }
    2370                 : 
    2371              25 :   if (NS_SUCCEEDED(result)) {
    2372              25 :     start.advance(-2); // Make sure to consume <!
    2373              25 :     CopyUnicodeTo(start, end, mTextValue);
    2374                 :   }
    2375                 : 
    2376              25 :   return result;
    2377                 : }
    2378                 : 
    2379                 : PRInt32
    2380              75 : CDoctypeDeclToken::GetTokenType()
    2381                 : {
    2382              75 :   return eToken_doctypeDecl;
    2383                 : }
    2384                 : 
    2385                 : const nsSubstring&
    2386              25 : CDoctypeDeclToken::GetStringValue()
    2387                 : {
    2388              25 :   return mTextValue;
    2389                 : }
    2390                 : 
    2391                 : void
    2392              25 : CDoctypeDeclToken::SetStringValue(const nsAString& aStr)
    2393                 : {
    2394              25 :   mTextValue.Assign(aStr);
    2395              25 : }

Generated by: LCOV version 1.7