LCOV - code coverage report
Current view: directory - parser/htmlparser/src - nsParser.h (source / functions) Found Hit Coverage
Test: app.info Lines: 16 12 75.0 %
Date: 2012-06-02 Functions: 10 9 90.0 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is mozilla.org code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *
      24                 :  * Alternatively, the contents of this file may be used under the terms of
      25                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      26                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28                 :  * of those above. If you wish to allow use of your version of this file only
      29                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30                 :  * use your version of this file under the terms of the MPL, indicate your
      31                 :  * decision by deleting the provisions above and replace them with the notice
      32                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33                 :  * the provisions above, a recipient may use your version of this file under
      34                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      35                 :  *
      36                 :  * ***** END LICENSE BLOCK ***** */
      37                 :  
      38                 : /**
      39                 :  * MODULE NOTES:
      40                 :  * 
      41                 :  *  This class does two primary jobs:
      42                 :  *    1) It iterates the tokens provided during the 
      43                 :  *       tokenization process, identifing where elements
      44                 :  *       begin and end (doing validation and normalization).
      45                 :  *    2) It controls and coordinates with an instance of
      46                 :  *       the IContentSink interface, to coordinate the
      47                 :  *       the production of the content model.
      48                 :  *
      49                 :  *  The basic operation of this class assumes that an HTML
      50                 :  *  document is non-normalized. Therefore, we don't process
      51                 :  *  the document in a normalized way. Don't bother to look
      52                 :  *  for methods like: doHead() or doBody().
      53                 :  *
      54                 :  *  Instead, in order to be backward compatible, we must
      55                 :  *  scan the set of tokens and perform this basic set of
      56                 :  *  operations:
      57                 :  *    1)  Determine the token type (easy, since the tokens know)
      58                 :  *    2)  Determine the appropriate section of the HTML document
      59                 :  *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
      60                 :  *    3)  Insert content into our document (via the sink) into
      61                 :  *        the correct section.
      62                 :  *    4)  In the case of tags that belong in the BODY, we must
      63                 :  *        ensure that our underlying document state reflects
      64                 :  *        the appropriate context for our tag. 
      65                 :  *
      66                 :  *        For example,if we see a <TR>, we must ensure our 
      67                 :  *        document contains a table into which the row can
      68                 :  *        be placed. This may result in "implicit containers" 
      69                 :  *        created to ensure a well-formed document.
      70                 :  *         
      71                 :  */
      72                 : 
      73                 : #ifndef NS_PARSER__
      74                 : #define NS_PARSER__
      75                 : 
      76                 : #include "nsIParser.h"
      77                 : #include "nsDeque.h"
      78                 : #include "nsParserNode.h"
      79                 : #include "nsIURL.h"
      80                 : #include "CParserContext.h"
      81                 : #include "nsParserCIID.h"
      82                 : #include "nsITokenizer.h"
      83                 : #include "nsHTMLTags.h"
      84                 : #include "nsDTDUtils.h"
      85                 : #include "nsThreadUtils.h"
      86                 : #include "nsIContentSink.h"
      87                 : #include "nsCOMArray.h"
      88                 : #include "nsCycleCollectionParticipant.h"
      89                 : #include "nsWeakReference.h"
      90                 : 
      91                 : class nsICharsetConverterManager;
      92                 : class nsIDTD;
      93                 : class nsScanner;
      94                 : class nsIThreadPool;
      95                 : 
      96                 : #ifdef _MSC_VER
      97                 : #pragma warning( disable : 4275 )
      98                 : #endif
      99                 : 
     100                 : 
     101                 : class nsParser : public nsIParser,
     102                 :                  public nsIStreamListener,
     103                 :                  public nsSupportsWeakReference
     104                 : {
     105                 :   public:
     106                 :     /**
     107                 :      * Called on module init
     108                 :      */
     109                 :     static nsresult Init();
     110                 : 
     111                 :     /**
     112                 :      * Called on module shutdown
     113                 :      */
     114                 :     static void Shutdown();
     115                 : 
     116              65 :     NS_DECL_CYCLE_COLLECTING_ISUPPORTS
     117           41376 :     NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
     118                 : 
     119                 :     /**
     120                 :      * default constructor
     121                 :      * @update  gess5/11/98
     122                 :      */
     123                 :     nsParser();
     124                 : 
     125                 :     /**
     126                 :      * Destructor
     127                 :      * @update  gess5/11/98
     128                 :      */
     129                 :     virtual ~nsParser();
     130                 : 
     131                 :     /**
     132                 :      * Select given content sink into parser for parser output
     133                 :      * @update  gess5/11/98
     134                 :      * @param   aSink is the new sink to be used by parser
     135                 :      * @return  old sink, or NULL
     136                 :      */
     137                 :     NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
     138                 : 
     139                 :     /**
     140                 :      * retrive the sink set into the parser 
     141                 :      * @update  gess5/11/98
     142                 :      * @param   aSink is the new sink to be used by parser
     143                 :      * @return  old sink, or NULL
     144                 :      */
     145                 :     NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
     146                 :     
     147                 :     /**
     148                 :      *  Call this method once you've created a parser, and want to instruct it
     149                 :      *  about the command which caused the parser to be constructed. For example,
     150                 :      *  this allows us to select a DTD which can do, say, view-source.
     151                 :      *  
     152                 :      *  @update  gess 3/25/98
     153                 :      *  @param   aCommand -- ptrs to string that contains command
     154                 :      *  @return  nada
     155                 :      */
     156                 :     NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
     157                 :     NS_IMETHOD_(void) SetCommand(const char* aCommand);
     158                 :     NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
     159                 : 
     160                 :     /**
     161                 :      *  Call this method once you've created a parser, and want to instruct it
     162                 :      *  about what charset to load
     163                 :      *  
     164                 :      *  @update  ftang 4/23/99
     165                 :      *  @param   aCharset- the charset of a document
     166                 :      *  @param   aCharsetSource- the source of the charset
     167                 :      *  @return  nada
     168                 :      */
     169                 :     NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource);
     170                 : 
     171               0 :     NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, PRInt32& aSource)
     172                 :     {
     173               0 :          aCharset = mCharset;
     174               0 :          aSource = mCharsetSource;
     175               0 :     }
     176                 : 
     177                 :     /**
     178                 :      * Cause parser to parse input from given URL 
     179                 :      * @update  gess5/11/98
     180                 :      * @param   aURL is a descriptor for source document
     181                 :      * @param   aListener is a listener to forward notifications to
     182                 :      * @return  TRUE if all went well -- FALSE otherwise
     183                 :      */
     184                 :     NS_IMETHOD Parse(nsIURI* aURL,
     185                 :                      nsIRequestObserver* aListener = nsnull,
     186                 :                      void* aKey = 0,
     187                 :                      nsDTDMode aMode = eDTDMode_autodetect);
     188                 : 
     189                 :     /**
     190                 :      * @update  gess5/11/98
     191                 :      * @param   anHTMLString contains a string-full of real HTML
     192                 :      * @param   appendTokens tells us whether we should insert tokens inline, or append them.
     193                 :      * @return  TRUE if all went well -- FALSE otherwise
     194                 :      */
     195                 :     NS_IMETHOD Parse(const nsAString& aSourceBuffer,
     196                 :                      void* aKey,
     197                 :                      const nsACString& aContentType,
     198                 :                      bool aLastCall,
     199                 :                      nsDTDMode aMode = eDTDMode_autodetect);
     200                 : 
     201                 :     /**
     202                 :      * This method needs documentation
     203                 :      */
     204                 :     NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
     205                 :                              nsTArray<nsString>& aTagStack);
     206                 :                              
     207                 :     /**
     208                 :      * This method gets called when the tokens have been consumed, and it's time
     209                 :      * to build the model via the content sink.
     210                 :      * @update  gess5/11/98
     211                 :      * @return  YES if model building went well -- NO otherwise.
     212                 :      */
     213                 :     NS_IMETHOD BuildModel(void);
     214                 : 
     215                 :     NS_IMETHOD        ContinueInterruptedParsing();
     216                 :     NS_IMETHOD_(void) BlockParser();
     217                 :     NS_IMETHOD_(void) UnblockParser();
     218                 :     NS_IMETHOD_(void) ContinueInterruptedParsingAsync();
     219                 :     NS_IMETHOD        Terminate(void);
     220                 : 
     221                 :     /**
     222                 :      * Call this to query whether the parser is enabled or not.
     223                 :      *
     224                 :      *  @update  vidur 4/12/99
     225                 :      *  @return  current state
     226                 :      */
     227                 :     NS_IMETHOD_(bool) IsParserEnabled();
     228                 : 
     229                 :     /**
     230                 :      * Call this to query whether the parser thinks it's done with parsing.
     231                 :      *
     232                 :      *  @update  rickg 5/12/01
     233                 :      *  @return  complete state
     234                 :      */
     235                 :     NS_IMETHOD_(bool) IsComplete();
     236                 : 
     237                 :     /**
     238                 :      *  This rather arcane method (hack) is used as a signal between the
     239                 :      *  DTD and the parser. It allows the DTD to tell the parser that content
     240                 :      *  that comes through (parser::parser(string)) but not consumed should
     241                 :      *  propagate into the next string based parse call.
     242                 :      *  
     243                 :      *  @update  gess 9/1/98
     244                 :      *  @param   aState determines whether we propagate unused string content.
     245                 :      *  @return  current state
     246                 :      */
     247                 :     void SetUnusedInput(nsString& aBuffer);
     248                 : 
     249                 :     /**
     250                 :      * This method gets called (automatically) during incremental parsing
     251                 :      * @update  gess5/11/98
     252                 :      * @return  TRUE if all went well, otherwise FALSE
     253                 :      */
     254                 :     virtual nsresult ResumeParse(bool allowIteration = true, 
     255                 :                                  bool aIsFinalChunk = false,
     256                 :                                  bool aCanInterrupt = true);
     257                 : 
     258                 :      //*********************************************
     259                 :       // These methods are callback methods used by
     260                 :       // net lib to let us know about our inputstream.
     261                 :       //*********************************************
     262                 :     // nsIRequestObserver methods:
     263                 :     NS_DECL_NSIREQUESTOBSERVER
     264                 : 
     265                 :     // nsIStreamListener methods:
     266                 :     NS_DECL_NSISTREAMLISTENER
     267                 : 
     268                 :     void              PushContext(CParserContext& aContext);
     269                 :     CParserContext*   PopContext();
     270                 :     CParserContext*   PeekContext() {return mParserContext;}
     271                 : 
     272                 :     /** 
     273                 :      * Get the channel associated with this parser
     274                 :      * @update harishd,gagan 07/17/01
     275                 :      * @param aChannel out param that will contain the result
     276                 :      * @return NS_OK if successful
     277                 :      */
     278                 :     NS_IMETHOD GetChannel(nsIChannel** aChannel);
     279                 : 
     280                 :     /** 
     281                 :      * Get the DTD associated with this parser
     282                 :      * @update vidur 9/29/99
     283                 :      * @param aDTD out param that will contain the result
     284                 :      * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
     285                 :      */
     286                 :     NS_IMETHOD GetDTD(nsIDTD** aDTD);
     287                 :   
     288                 :     /**
     289                 :      * Get the nsIStreamListener for this parser
     290                 :      */
     291                 :     virtual nsIStreamListener* GetStreamListener();
     292                 : 
     293                 :     /** 
     294                 :      * Detects the existence of a META tag with charset information in 
     295                 :      * the given buffer.
     296                 :      */
     297                 :     bool DetectMetaTag(const char* aBytes, 
     298                 :                          PRInt32 aLen, 
     299                 :                          nsCString& oCharset, 
     300                 :                          PRInt32& oCharsetSource);
     301                 : 
     302                 :     void SetSinkCharset(nsACString& aCharset);
     303                 : 
     304                 :     /**
     305                 :      *  Removes continue parsing events
     306                 :      *  @update  kmcclusk 5/18/98
     307                 :      */
     308                 : 
     309                 :     NS_IMETHODIMP CancelParsingEvents();
     310                 : 
     311                 :     /**
     312                 :      * Return true.
     313                 :      */
     314                 :     virtual bool IsInsertionPointDefined();
     315                 : 
     316                 :     /**
     317                 :      * No-op.
     318                 :      */
     319                 :     virtual void BeginEvaluatingParserInsertedScript();
     320                 : 
     321                 :     /**
     322                 :      * No-op.
     323                 :      */
     324                 :     virtual void EndEvaluatingParserInsertedScript();
     325                 : 
     326                 :     /**
     327                 :      * No-op.
     328                 :      */
     329                 :     virtual void MarkAsNotScriptCreated(const char* aCommand);
     330                 : 
     331                 :     /**
     332                 :      * Always false.
     333                 :      */
     334                 :     virtual bool IsScriptCreated();
     335                 : 
     336                 :     /**  
     337                 :      *  Set to parser state to indicate whether parsing tokens can be interrupted
     338                 :      *  @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
     339                 :      *  @update  kmcclusk 5/18/98
     340                 :      */
     341                 :     void SetCanInterrupt(bool aCanInterrupt);
     342                 : 
     343                 :     /**
     344                 :      * This is called when the final chunk has been
     345                 :      * passed to the parser and the content sink has
     346                 :      * interrupted token processing. It schedules
     347                 :      * a ParserContinue PL_Event which will ask the parser
     348                 :      * to HandleParserContinueEvent when it is handled.
     349                 :      * @update  kmcclusk6/1/2001
     350                 :      */
     351                 :     nsresult PostContinueEvent();
     352                 : 
     353                 :     /**
     354                 :      *  Fired when the continue parse event is triggered.
     355                 :      *  @update  kmcclusk 5/18/98
     356                 :      */
     357                 :     void HandleParserContinueEvent(class nsParserContinueEvent *);
     358                 : 
     359            7044 :     static nsICharsetConverterManager* GetCharsetConverterManager() {
     360            7044 :       return sCharsetConverterManager;
     361                 :     }
     362                 : 
     363               1 :     virtual void Reset() {
     364               1 :       Cleanup();
     365               1 :       Initialize();
     366               1 :     }
     367                 : 
     368            6766 :     bool IsScriptExecuting() {
     369            6766 :       return mSink && mSink->IsScriptExecuting();
     370                 :     }
     371                 : 
     372            6766 :     bool IsOkToProcessNetworkData() {
     373            6766 :       return !IsScriptExecuting() && !mProcessingNetworkData;
     374                 :     }
     375                 : 
     376                 :  protected:
     377                 : 
     378                 :     void Initialize(bool aConstructor = false);
     379                 :     void Cleanup();
     380                 : 
     381                 :     /**
     382                 :      * 
     383                 :      * @update  gess5/18/98
     384                 :      * @param 
     385                 :      * @return
     386                 :      */
     387                 :     nsresult WillBuildModel(nsString& aFilename);
     388                 : 
     389                 :     /**
     390                 :      * 
     391                 :      * @update  gess5/18/98
     392                 :      * @param 
     393                 :      * @return
     394                 :      */
     395                 :     nsresult DidBuildModel(nsresult anErrorCode);
     396                 : 
     397                 : private:
     398                 : 
     399                 :     /*******************************************
     400                 :       These are the tokenization methods...
     401                 :      *******************************************/
     402                 : 
     403                 :     /**
     404                 :      *  Part of the code sandwich, this gets called right before
     405                 :      *  the tokenization process begins. The main reason for
     406                 :      *  this call is to allow the delegate to do initialization.
     407                 :      *  
     408                 :      *  @update  gess 3/25/98
     409                 :      *  @param   
     410                 :      *  @return  TRUE if it's ok to proceed
     411                 :      */
     412                 :     bool WillTokenize(bool aIsFinalChunk = false);
     413                 : 
     414                 :    
     415                 :     /**
     416                 :      *  This is the primary control routine. It iteratively
     417                 :      *  consumes tokens until an error occurs or you run out
     418                 :      *  of data.
     419                 :      *  
     420                 :      *  @update  gess 3/25/98
     421                 :      *  @return  error code 
     422                 :      */
     423                 :     nsresult Tokenize(bool aIsFinalChunk = false);
     424                 : 
     425                 :     /**
     426                 :      *  This is the tail-end of the code sandwich for the
     427                 :      *  tokenization process. It gets called once tokenziation
     428                 :      *  has completed.
     429                 :      *  
     430                 :      *  @update  gess 3/25/98
     431                 :      *  @param   
     432                 :      *  @return  TRUE if all went well
     433                 :      */
     434                 :     bool DidTokenize(bool aIsFinalChunk = false);
     435                 : 
     436                 : protected:
     437                 :     //*********************************************
     438                 :     // And now, some data members...
     439                 :     //*********************************************
     440                 :     
     441                 :       
     442                 :     CParserContext*              mParserContext;
     443                 :     nsCOMPtr<nsIDTD>             mDTD;
     444                 :     nsCOMPtr<nsIRequestObserver> mObserver;
     445                 :     nsCOMPtr<nsIContentSink>     mSink;
     446                 :     nsIRunnable*                 mContinueEvent;  // weak ref
     447                 :    
     448                 :     nsTokenAllocator          mTokenAllocator;
     449                 :     
     450                 :     eParserCommands     mCommand;
     451                 :     nsresult            mInternalState;
     452                 :     PRInt32             mStreamStatus;
     453                 :     PRInt32             mCharsetSource;
     454                 :     
     455                 :     PRUint16            mFlags;
     456                 : 
     457                 :     nsString            mUnusedInput;
     458                 :     nsCString           mCharset;
     459                 :     nsCString           mCommandStr;
     460                 : 
     461                 :     bool                mProcessingNetworkData;
     462                 :     bool                mIsAboutBlank;
     463                 : 
     464                 :     static nsICharsetConverterManager* sCharsetConverterManager;
     465                 : };
     466                 : 
     467                 : #endif 
     468                 : 

Generated by: LCOV version 1.7