1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : /**
39 : * MODULE NOTES:
40 : *
41 : * This class does two primary jobs:
42 : * 1) It iterates the tokens provided during the
43 : * tokenization process, identifing where elements
44 : * begin and end (doing validation and normalization).
45 : * 2) It controls and coordinates with an instance of
46 : * the IContentSink interface, to coordinate the
47 : * the production of the content model.
48 : *
49 : * The basic operation of this class assumes that an HTML
50 : * document is non-normalized. Therefore, we don't process
51 : * the document in a normalized way. Don't bother to look
52 : * for methods like: doHead() or doBody().
53 : *
54 : * Instead, in order to be backward compatible, we must
55 : * scan the set of tokens and perform this basic set of
56 : * operations:
57 : * 1) Determine the token type (easy, since the tokens know)
58 : * 2) Determine the appropriate section of the HTML document
59 : * each token belongs in (HTML,HEAD,BODY,FRAMESET).
60 : * 3) Insert content into our document (via the sink) into
61 : * the correct section.
62 : * 4) In the case of tags that belong in the BODY, we must
63 : * ensure that our underlying document state reflects
64 : * the appropriate context for our tag.
65 : *
66 : * For example,if we see a <TR>, we must ensure our
67 : * document contains a table into which the row can
68 : * be placed. This may result in "implicit containers"
69 : * created to ensure a well-formed document.
70 : *
71 : */
72 :
73 : #ifndef NS_PARSER__
74 : #define NS_PARSER__
75 :
76 : #include "nsIParser.h"
77 : #include "nsDeque.h"
78 : #include "nsParserNode.h"
79 : #include "nsIURL.h"
80 : #include "CParserContext.h"
81 : #include "nsParserCIID.h"
82 : #include "nsITokenizer.h"
83 : #include "nsHTMLTags.h"
84 : #include "nsDTDUtils.h"
85 : #include "nsThreadUtils.h"
86 : #include "nsIContentSink.h"
87 : #include "nsCOMArray.h"
88 : #include "nsCycleCollectionParticipant.h"
89 : #include "nsWeakReference.h"
90 :
91 : class nsICharsetConverterManager;
92 : class nsIDTD;
93 : class nsScanner;
94 : class nsIThreadPool;
95 :
96 : #ifdef _MSC_VER
97 : #pragma warning( disable : 4275 )
98 : #endif
99 :
100 :
101 : class nsParser : public nsIParser,
102 : public nsIStreamListener,
103 : public nsSupportsWeakReference
104 : {
105 : public:
106 : /**
107 : * Called on module init
108 : */
109 : static nsresult Init();
110 :
111 : /**
112 : * Called on module shutdown
113 : */
114 : static void Shutdown();
115 :
116 65 : NS_DECL_CYCLE_COLLECTING_ISUPPORTS
117 41376 : NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
118 :
119 : /**
120 : * default constructor
121 : * @update gess5/11/98
122 : */
123 : nsParser();
124 :
125 : /**
126 : * Destructor
127 : * @update gess5/11/98
128 : */
129 : virtual ~nsParser();
130 :
131 : /**
132 : * Select given content sink into parser for parser output
133 : * @update gess5/11/98
134 : * @param aSink is the new sink to be used by parser
135 : * @return old sink, or NULL
136 : */
137 : NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
138 :
139 : /**
140 : * retrive the sink set into the parser
141 : * @update gess5/11/98
142 : * @param aSink is the new sink to be used by parser
143 : * @return old sink, or NULL
144 : */
145 : NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
146 :
147 : /**
148 : * Call this method once you've created a parser, and want to instruct it
149 : * about the command which caused the parser to be constructed. For example,
150 : * this allows us to select a DTD which can do, say, view-source.
151 : *
152 : * @update gess 3/25/98
153 : * @param aCommand -- ptrs to string that contains command
154 : * @return nada
155 : */
156 : NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
157 : NS_IMETHOD_(void) SetCommand(const char* aCommand);
158 : NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
159 :
160 : /**
161 : * Call this method once you've created a parser, and want to instruct it
162 : * about what charset to load
163 : *
164 : * @update ftang 4/23/99
165 : * @param aCharset- the charset of a document
166 : * @param aCharsetSource- the source of the charset
167 : * @return nada
168 : */
169 : NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource);
170 :
171 0 : NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, PRInt32& aSource)
172 : {
173 0 : aCharset = mCharset;
174 0 : aSource = mCharsetSource;
175 0 : }
176 :
177 : /**
178 : * Cause parser to parse input from given URL
179 : * @update gess5/11/98
180 : * @param aURL is a descriptor for source document
181 : * @param aListener is a listener to forward notifications to
182 : * @return TRUE if all went well -- FALSE otherwise
183 : */
184 : NS_IMETHOD Parse(nsIURI* aURL,
185 : nsIRequestObserver* aListener = nsnull,
186 : void* aKey = 0,
187 : nsDTDMode aMode = eDTDMode_autodetect);
188 :
189 : /**
190 : * @update gess5/11/98
191 : * @param anHTMLString contains a string-full of real HTML
192 : * @param appendTokens tells us whether we should insert tokens inline, or append them.
193 : * @return TRUE if all went well -- FALSE otherwise
194 : */
195 : NS_IMETHOD Parse(const nsAString& aSourceBuffer,
196 : void* aKey,
197 : const nsACString& aContentType,
198 : bool aLastCall,
199 : nsDTDMode aMode = eDTDMode_autodetect);
200 :
201 : /**
202 : * This method needs documentation
203 : */
204 : NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
205 : nsTArray<nsString>& aTagStack);
206 :
207 : /**
208 : * This method gets called when the tokens have been consumed, and it's time
209 : * to build the model via the content sink.
210 : * @update gess5/11/98
211 : * @return YES if model building went well -- NO otherwise.
212 : */
213 : NS_IMETHOD BuildModel(void);
214 :
215 : NS_IMETHOD ContinueInterruptedParsing();
216 : NS_IMETHOD_(void) BlockParser();
217 : NS_IMETHOD_(void) UnblockParser();
218 : NS_IMETHOD_(void) ContinueInterruptedParsingAsync();
219 : NS_IMETHOD Terminate(void);
220 :
221 : /**
222 : * Call this to query whether the parser is enabled or not.
223 : *
224 : * @update vidur 4/12/99
225 : * @return current state
226 : */
227 : NS_IMETHOD_(bool) IsParserEnabled();
228 :
229 : /**
230 : * Call this to query whether the parser thinks it's done with parsing.
231 : *
232 : * @update rickg 5/12/01
233 : * @return complete state
234 : */
235 : NS_IMETHOD_(bool) IsComplete();
236 :
237 : /**
238 : * This rather arcane method (hack) is used as a signal between the
239 : * DTD and the parser. It allows the DTD to tell the parser that content
240 : * that comes through (parser::parser(string)) but not consumed should
241 : * propagate into the next string based parse call.
242 : *
243 : * @update gess 9/1/98
244 : * @param aState determines whether we propagate unused string content.
245 : * @return current state
246 : */
247 : void SetUnusedInput(nsString& aBuffer);
248 :
249 : /**
250 : * This method gets called (automatically) during incremental parsing
251 : * @update gess5/11/98
252 : * @return TRUE if all went well, otherwise FALSE
253 : */
254 : virtual nsresult ResumeParse(bool allowIteration = true,
255 : bool aIsFinalChunk = false,
256 : bool aCanInterrupt = true);
257 :
258 : //*********************************************
259 : // These methods are callback methods used by
260 : // net lib to let us know about our inputstream.
261 : //*********************************************
262 : // nsIRequestObserver methods:
263 : NS_DECL_NSIREQUESTOBSERVER
264 :
265 : // nsIStreamListener methods:
266 : NS_DECL_NSISTREAMLISTENER
267 :
268 : void PushContext(CParserContext& aContext);
269 : CParserContext* PopContext();
270 : CParserContext* PeekContext() {return mParserContext;}
271 :
272 : /**
273 : * Get the channel associated with this parser
274 : * @update harishd,gagan 07/17/01
275 : * @param aChannel out param that will contain the result
276 : * @return NS_OK if successful
277 : */
278 : NS_IMETHOD GetChannel(nsIChannel** aChannel);
279 :
280 : /**
281 : * Get the DTD associated with this parser
282 : * @update vidur 9/29/99
283 : * @param aDTD out param that will contain the result
284 : * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
285 : */
286 : NS_IMETHOD GetDTD(nsIDTD** aDTD);
287 :
288 : /**
289 : * Get the nsIStreamListener for this parser
290 : */
291 : virtual nsIStreamListener* GetStreamListener();
292 :
293 : /**
294 : * Detects the existence of a META tag with charset information in
295 : * the given buffer.
296 : */
297 : bool DetectMetaTag(const char* aBytes,
298 : PRInt32 aLen,
299 : nsCString& oCharset,
300 : PRInt32& oCharsetSource);
301 :
302 : void SetSinkCharset(nsACString& aCharset);
303 :
304 : /**
305 : * Removes continue parsing events
306 : * @update kmcclusk 5/18/98
307 : */
308 :
309 : NS_IMETHODIMP CancelParsingEvents();
310 :
311 : /**
312 : * Return true.
313 : */
314 : virtual bool IsInsertionPointDefined();
315 :
316 : /**
317 : * No-op.
318 : */
319 : virtual void BeginEvaluatingParserInsertedScript();
320 :
321 : /**
322 : * No-op.
323 : */
324 : virtual void EndEvaluatingParserInsertedScript();
325 :
326 : /**
327 : * No-op.
328 : */
329 : virtual void MarkAsNotScriptCreated(const char* aCommand);
330 :
331 : /**
332 : * Always false.
333 : */
334 : virtual bool IsScriptCreated();
335 :
336 : /**
337 : * Set to parser state to indicate whether parsing tokens can be interrupted
338 : * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
339 : * @update kmcclusk 5/18/98
340 : */
341 : void SetCanInterrupt(bool aCanInterrupt);
342 :
343 : /**
344 : * This is called when the final chunk has been
345 : * passed to the parser and the content sink has
346 : * interrupted token processing. It schedules
347 : * a ParserContinue PL_Event which will ask the parser
348 : * to HandleParserContinueEvent when it is handled.
349 : * @update kmcclusk6/1/2001
350 : */
351 : nsresult PostContinueEvent();
352 :
353 : /**
354 : * Fired when the continue parse event is triggered.
355 : * @update kmcclusk 5/18/98
356 : */
357 : void HandleParserContinueEvent(class nsParserContinueEvent *);
358 :
359 7044 : static nsICharsetConverterManager* GetCharsetConverterManager() {
360 7044 : return sCharsetConverterManager;
361 : }
362 :
363 1 : virtual void Reset() {
364 1 : Cleanup();
365 1 : Initialize();
366 1 : }
367 :
368 6766 : bool IsScriptExecuting() {
369 6766 : return mSink && mSink->IsScriptExecuting();
370 : }
371 :
372 6766 : bool IsOkToProcessNetworkData() {
373 6766 : return !IsScriptExecuting() && !mProcessingNetworkData;
374 : }
375 :
376 : protected:
377 :
378 : void Initialize(bool aConstructor = false);
379 : void Cleanup();
380 :
381 : /**
382 : *
383 : * @update gess5/18/98
384 : * @param
385 : * @return
386 : */
387 : nsresult WillBuildModel(nsString& aFilename);
388 :
389 : /**
390 : *
391 : * @update gess5/18/98
392 : * @param
393 : * @return
394 : */
395 : nsresult DidBuildModel(nsresult anErrorCode);
396 :
397 : private:
398 :
399 : /*******************************************
400 : These are the tokenization methods...
401 : *******************************************/
402 :
403 : /**
404 : * Part of the code sandwich, this gets called right before
405 : * the tokenization process begins. The main reason for
406 : * this call is to allow the delegate to do initialization.
407 : *
408 : * @update gess 3/25/98
409 : * @param
410 : * @return TRUE if it's ok to proceed
411 : */
412 : bool WillTokenize(bool aIsFinalChunk = false);
413 :
414 :
415 : /**
416 : * This is the primary control routine. It iteratively
417 : * consumes tokens until an error occurs or you run out
418 : * of data.
419 : *
420 : * @update gess 3/25/98
421 : * @return error code
422 : */
423 : nsresult Tokenize(bool aIsFinalChunk = false);
424 :
425 : /**
426 : * This is the tail-end of the code sandwich for the
427 : * tokenization process. It gets called once tokenziation
428 : * has completed.
429 : *
430 : * @update gess 3/25/98
431 : * @param
432 : * @return TRUE if all went well
433 : */
434 : bool DidTokenize(bool aIsFinalChunk = false);
435 :
436 : protected:
437 : //*********************************************
438 : // And now, some data members...
439 : //*********************************************
440 :
441 :
442 : CParserContext* mParserContext;
443 : nsCOMPtr<nsIDTD> mDTD;
444 : nsCOMPtr<nsIRequestObserver> mObserver;
445 : nsCOMPtr<nsIContentSink> mSink;
446 : nsIRunnable* mContinueEvent; // weak ref
447 :
448 : nsTokenAllocator mTokenAllocator;
449 :
450 : eParserCommands mCommand;
451 : nsresult mInternalState;
452 : PRInt32 mStreamStatus;
453 : PRInt32 mCharsetSource;
454 :
455 : PRUint16 mFlags;
456 :
457 : nsString mUnusedInput;
458 : nsCString mCharset;
459 : nsCString mCommandStr;
460 :
461 : bool mProcessingNetworkData;
462 : bool mIsAboutBlank;
463 :
464 : static nsICharsetConverterManager* sCharsetConverterManager;
465 : };
466 :
467 : #endif
468 :
|