1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set sw=2 ts=2 et tw=78: */
3 : /* ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is mozilla.org code.
17 : *
18 : * The Initial Developer of the Original Code is
19 : * Netscape Communications Corporation.
20 : * Portions created by the Initial Developer are Copyright (C) 1998
21 : * the Initial Developer. All Rights Reserved.
22 : *
23 : * Contributor(s):
24 : * Blake Kaplan <mrbkap@gmail.com>
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either of the GNU General Public License Version 2 or later (the "GPL"),
28 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 :
40 :
41 : /**
42 : * @file nsHTMLTokenizer.cpp
43 : * This is an implementation of the nsITokenizer interface.
44 : * This file contains the implementation of a tokenizer to tokenize an HTML
45 : * document. It attempts to do so, making tradeoffs between compatibility with
46 : * older parsers and the SGML specification. Note that most of the real
47 : * "tokenization" takes place in nsHTMLTokens.cpp.
48 : */
49 :
50 : #include "nsIAtom.h"
51 : #include "nsHTMLTokenizer.h"
52 : #include "nsScanner.h"
53 : #include "nsElementTable.h"
54 : #include "nsReadableUtils.h"
55 : #include "nsUnicharUtils.h"
56 : #include "nsParserConstants.h"
57 :
58 : /************************************************************************
59 : And now for the main class -- nsHTMLTokenizer...
60 : ************************************************************************/
61 :
62 : /**
63 : * Satisfy the nsISupports interface.
64 : */
65 140 : NS_IMPL_ISUPPORTS1(nsHTMLTokenizer, nsITokenizer)
66 :
67 : /**
68 : * Default constructor
69 : *
70 : * @param aParseMode The current mode the document is in (quirks, etc.)
71 : * @param aDocType The document type of the current document
72 : * @param aCommand What we are trying to do (view-source, parse a fragment, etc.)
73 : */
74 28 : nsHTMLTokenizer::nsHTMLTokenizer(nsDTDMode aParseMode,
75 : eParserDocType aDocType,
76 : eParserCommands aCommand,
77 : PRUint32 aFlags)
78 28 : : mTokenDeque(0), mFlags(aFlags)
79 : {
80 28 : if (aParseMode == eDTDMode_full_standards ||
81 : aParseMode == eDTDMode_almost_standards) {
82 0 : mFlags |= NS_IPARSER_FLAG_STRICT_MODE;
83 28 : } else if (aParseMode == eDTDMode_quirks) {
84 28 : mFlags |= NS_IPARSER_FLAG_QUIRKS_MODE;
85 0 : } else if (aParseMode == eDTDMode_autodetect) {
86 0 : mFlags |= NS_IPARSER_FLAG_AUTO_DETECT_MODE;
87 : } else {
88 0 : mFlags |= NS_IPARSER_FLAG_UNKNOWN_MODE;
89 : }
90 :
91 28 : if (aDocType == ePlainText) {
92 0 : mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT;
93 28 : } else if (aDocType == eXML) {
94 0 : mFlags |= NS_IPARSER_FLAG_XML;
95 28 : } else if (aDocType == eHTML_Quirks ||
96 : aDocType == eHTML_Strict) {
97 28 : mFlags |= NS_IPARSER_FLAG_HTML;
98 : }
99 :
100 : mFlags |= aCommand == eViewSource
101 : ? NS_IPARSER_FLAG_VIEW_SOURCE
102 28 : : NS_IPARSER_FLAG_VIEW_NORMAL;
103 :
104 28 : NS_ASSERTION(!(mFlags & NS_IPARSER_FLAG_XML) ||
105 : (mFlags & NS_IPARSER_FLAG_VIEW_SOURCE),
106 : "Why isn't this XML document going through our XML parser?");
107 :
108 28 : mTokenAllocator = nsnull;
109 28 : mTokenScanPos = 0;
110 28 : }
111 :
112 : /**
113 : * The destructor ensures that we don't leak any left over tokens.
114 : */
115 84 : nsHTMLTokenizer::~nsHTMLTokenizer()
116 : {
117 28 : if (mTokenDeque.GetSize()) {
118 0 : CTokenDeallocator theDeallocator(mTokenAllocator->GetArenaPool());
119 0 : mTokenDeque.ForEach(theDeallocator);
120 : }
121 112 : }
122 :
123 : /*static*/ PRUint32
124 56 : nsHTMLTokenizer::GetFlags(const nsIContentSink* aSink)
125 : {
126 56 : PRUint32 flags = 0;
127 : nsCOMPtr<nsIHTMLContentSink> sink =
128 112 : do_QueryInterface(const_cast<nsIContentSink*>(aSink));
129 56 : if (sink) {
130 56 : bool enabled = true;
131 56 : sink->IsEnabled(eHTMLTag_frameset, &enabled);
132 56 : if (enabled) {
133 56 : flags |= NS_IPARSER_FLAG_FRAMES_ENABLED;
134 : }
135 56 : sink->IsEnabled(eHTMLTag_script, &enabled);
136 56 : if (enabled) {
137 56 : flags |= NS_IPARSER_FLAG_SCRIPT_ENABLED;
138 : }
139 : }
140 56 : return flags;
141 : }
142 :
143 : /*******************************************************************
144 : Here begins the real working methods for the tokenizer.
145 : *******************************************************************/
146 :
147 : /**
148 : * Adds a token onto the end of the deque if aResult is a successful result.
149 : * Otherwise, this function frees aToken and sets it to nsnull.
150 : *
151 : * @param aToken The token that wants to be added.
152 : * @param aResult The error code that will be used to determine if we actually
153 : * want to push this token.
154 : * @param aDeque The deque we want to push aToken onto.
155 : * @param aTokenAllocator The allocator we use to free aToken in case aResult
156 : * is not a success code.
157 : */
158 : /* static */
159 : void
160 3037 : nsHTMLTokenizer::AddToken(CToken*& aToken,
161 : nsresult aResult,
162 : nsDeque* aDeque,
163 : nsTokenAllocator* aTokenAllocator)
164 : {
165 3037 : if (aToken && aDeque) {
166 3037 : if (NS_SUCCEEDED(aResult)) {
167 3037 : aDeque->Push(aToken);
168 : } else {
169 0 : IF_FREE(aToken, aTokenAllocator);
170 : }
171 : }
172 3037 : }
173 :
174 : /**
175 : * Retrieve a pointer to the global token recycler...
176 : *
177 : * @return Pointer to recycler (or null)
178 : */
179 : nsTokenAllocator*
180 2573 : nsHTMLTokenizer::GetTokenAllocator()
181 : {
182 2573 : return mTokenAllocator;
183 : }
184 :
185 : /**
186 : * This method provides access to the topmost token in the tokenDeque.
187 : * The token is not really removed from the list.
188 : *
189 : * @return Pointer to token
190 : */
191 : CToken*
192 0 : nsHTMLTokenizer::PeekToken()
193 : {
194 0 : return (CToken*)mTokenDeque.PeekFront();
195 : }
196 :
197 : /**
198 : * This method provides access to the topmost token in the tokenDeque.
199 : * The token is really removed from the list; if the list is empty we return 0.
200 : *
201 : * @return Pointer to token or NULL
202 : */
203 : CToken*
204 3178 : nsHTMLTokenizer::PopToken()
205 : {
206 3178 : return (CToken*)mTokenDeque.PopFront();
207 : }
208 :
209 :
210 : /**
211 : * Pushes a token onto the front of our deque such that the next call to
212 : * PopToken() or PeekToken() will return that token.
213 : *
214 : * @param theToken The next token to be processed
215 : * @return theToken
216 : */
217 : CToken*
218 85 : nsHTMLTokenizer::PushTokenFront(CToken* theToken)
219 : {
220 85 : mTokenDeque.PushFront(theToken);
221 85 : return theToken;
222 : }
223 :
224 : /**
225 : * Pushes a token onto the deque.
226 : *
227 : * @param theToken the new token.
228 : * @return theToken
229 : */
230 : CToken*
231 0 : nsHTMLTokenizer::PushToken(CToken* theToken)
232 : {
233 0 : mTokenDeque.Push(theToken);
234 0 : return theToken;
235 : }
236 :
237 : /**
238 : * Returns the size of the deque.
239 : *
240 : * @return The number of remaining tokens.
241 : */
242 : PRInt32
243 581 : nsHTMLTokenizer::GetCount()
244 : {
245 581 : return mTokenDeque.GetSize();
246 : }
247 :
248 : /**
249 : * Allows access to an arbitrary token in the deque. The accessed token is left
250 : * in the deque.
251 : *
252 : * @param anIndex The index of the target token. Token 0 would be the same as
253 : * the result of a call to PeekToken()
254 : * @return The requested token.
255 : */
256 : CToken*
257 28 : nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex)
258 : {
259 28 : return (CToken*)mTokenDeque.ObjectAt(anIndex);
260 : }
261 :
262 : /**
263 : * This method is part of the "sandwich" that occurs when we want to tokenize
264 : * a document. This prepares us to be able to tokenize properly.
265 : *
266 : * @param aIsFinalChunk Whether this is the last chunk of data that we will
267 : * get to see.
268 : * @param aTokenAllocator The token allocator to use for this document.
269 : * @return Our success in setting up.
270 : */
271 : nsresult
272 53 : nsHTMLTokenizer::WillTokenize(bool aIsFinalChunk,
273 : nsTokenAllocator* aTokenAllocator)
274 : {
275 53 : mTokenAllocator = aTokenAllocator;
276 53 : mIsFinalChunk = aIsFinalChunk;
277 :
278 : // Cause ScanDocStructure to search from here for new tokens...
279 53 : mTokenScanPos = mTokenDeque.GetSize();
280 53 : return NS_OK;
281 : }
282 :
283 : /**
284 : * Pushes all of the tokens in aDeque onto the front of our deque so they
285 : * get processed before any other tokens.
286 : *
287 : * @param aDeque The deque with the tokens in it.
288 : */
289 : void
290 28 : nsHTMLTokenizer::PrependTokens(nsDeque& aDeque)
291 : {
292 28 : PRInt32 aCount = aDeque.GetSize();
293 :
294 82 : for (PRInt32 anIndex = 0; anIndex < aCount; ++anIndex) {
295 54 : CToken* theToken = (CToken*)aDeque.Pop();
296 54 : PushTokenFront(theToken);
297 : }
298 28 : }
299 :
300 : /**
301 : * Copies the state flags from aTokenizer into this tokenizer. This is used
302 : * to pass information around between the main tokenizer and tokenizers
303 : * created for document.write() calls.
304 : *
305 : * @param aTokenizer The tokenizer with more information in it.
306 : * @return NS_OK
307 : */
308 : nsresult
309 0 : nsHTMLTokenizer::CopyState(nsITokenizer* aTokenizer)
310 : {
311 0 : if (aTokenizer) {
312 0 : mFlags = ((nsHTMLTokenizer*)aTokenizer)->mFlags;
313 : }
314 :
315 0 : return NS_OK;
316 : }
317 :
318 : /**
319 : * This is a utilty method for ScanDocStructure, which finds a given
320 : * tag in the stack. The return value is meant to be used with
321 : * nsDeque::ObjectAt() on aTagStack.
322 : *
323 : * @param aTag -- the ID of the tag we're seeking
324 : * @param aTagStack -- the stack to be searched
325 : * @return index position of tag in stack if found, otherwise kNotFound
326 : */
327 : static PRInt32
328 576 : FindLastIndexOfTag(eHTMLTags aTag, nsDeque &aTagStack)
329 : {
330 576 : PRInt32 theCount = aTagStack.GetSize();
331 :
332 576 : while (0 < theCount) {
333 3794 : CHTMLToken* theToken = (CHTMLToken*)aTagStack.ObjectAt(--theCount);
334 3794 : if (theToken) {
335 3794 : eHTMLTags theTag = (eHTMLTags)theToken->GetTypeID();
336 3794 : if (theTag == aTag) {
337 296 : return theCount;
338 : }
339 : }
340 : }
341 :
342 280 : return kNotFound;
343 : }
344 :
345 : /**
346 : * This method scans the sequence of tokens to determine whether or not the
347 : * tag structure of the document is well formed. In well formed cases, we can
348 : * skip doing residual style handling and allow inlines to contain block-level
349 : * elements.
350 : *
351 : * @param aFinalChunk Is unused.
352 : * @return Success (currently, this function cannot fail).
353 : */
354 53 : nsresult nsHTMLTokenizer::ScanDocStructure(bool aFinalChunk)
355 : {
356 53 : nsresult result = NS_OK;
357 53 : if (!mTokenDeque.GetSize()) {
358 28 : return result;
359 : }
360 :
361 25 : CHTMLToken* theToken = (CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos);
362 :
363 : // Start by finding the first start tag that hasn't been reviewed.
364 50 : while (mTokenScanPos > 0) {
365 0 : if (theToken) {
366 0 : eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType());
367 0 : if (theType == eToken_start &&
368 0 : theToken->GetContainerInfo() == eFormUnknown) {
369 0 : break;
370 : }
371 : }
372 0 : theToken = (CHTMLToken*)mTokenDeque.ObjectAt(--mTokenScanPos);
373 : }
374 :
375 : // Now that we know where to start, let's walk through the
376 : // tokens to see which are well-formed. Stop when you run out
377 : // of fresh tokens.
378 :
379 50 : nsDeque theStack(0);
380 50 : nsDeque tempStack(0);
381 25 : PRInt32 theStackDepth = 0;
382 : // Don't bother if we get ridiculously deep.
383 : static const PRInt32 theMaxStackDepth = 200;
384 :
385 3087 : while (theToken && theStackDepth < theMaxStackDepth) {
386 3037 : eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType());
387 3037 : eHTMLTags theTag = (eHTMLTags)theToken->GetTypeID();
388 :
389 3037 : if (nsHTMLElement::IsContainer(theTag)) { // Bug 54117
390 1895 : bool theTagIsBlock = gHTMLElements[theTag].IsMemberOf(kBlockEntity);
391 : bool theTagIsInline = theTagIsBlock
392 : ? false
393 1895 : : gHTMLElements[theTag].IsMemberOf(kInlineEntity);
394 :
395 1895 : if (theTagIsBlock || theTagIsInline || eHTMLTag_table == theTag) {
396 1063 : switch(theType) {
397 : case eToken_start:
398 : {
399 744 : if (gHTMLElements[theTag].ShouldVerifyHierarchy()) {
400 490 : PRInt32 earlyPos = FindLastIndexOfTag(theTag, theStack);
401 490 : if (earlyPos != kNotFound) {
402 : // Uh-oh, we've found a tag that is not allowed to nest at
403 : // all. Mark the previous one and all of its children as
404 : // malformed to increase our chances of doing RS handling
405 : // on all of them. We want to do this for cases such as:
406 : // <a><div><a></a></div></a>.
407 : // Note that we have to iterate through all of the chilren
408 : // of the original malformed tag to protect against:
409 : // <a><font><div><a></a></div></font></a>, so that the <font>
410 : // is allowed to contain the <div>.
411 : // XXX What about <a><span><a>, where the second <a> closes
412 : // the <span>?
413 210 : nsDequeIterator it(theStack, earlyPos), end(theStack.End());
414 711 : while (it < end) {
415 : CHTMLToken *theMalformedToken =
416 291 : static_cast<CHTMLToken*>(it++);
417 :
418 291 : theMalformedToken->SetContainerInfo(eMalformed);
419 : }
420 : }
421 : }
422 :
423 744 : theStack.Push(theToken);
424 744 : ++theStackDepth;
425 : }
426 744 : break;
427 : case eToken_end:
428 : {
429 : CHTMLToken *theLastToken =
430 319 : static_cast<CHTMLToken*>(theStack.Peek());
431 319 : if (theLastToken) {
432 319 : if (theTag == theLastToken->GetTypeID()) {
433 233 : theStack.Pop(); // Yank it for real
434 233 : theStackDepth--;
435 233 : theLastToken->SetContainerInfo(eWellFormed);
436 : } else {
437 : // This token wasn't what we expected it to be! We need to
438 : // go searching for its real start tag on our stack. Each
439 : // tag in between the end tag and start tag must be malformed
440 :
441 86 : if (FindLastIndexOfTag(theTag, theStack) != kNotFound) {
442 : // Find theTarget in the stack, marking each (malformed!)
443 : // tag in our way.
444 86 : theStack.Pop(); // Pop off theLastToken for real.
445 1208 : do {
446 604 : theLastToken->SetContainerInfo(eMalformed);
447 604 : tempStack.Push(theLastToken);
448 604 : theLastToken = static_cast<CHTMLToken*>(theStack.Pop());
449 604 : } while (theLastToken && theTag != theLastToken->GetTypeID());
450 : // XXX The above test can confuse two different userdefined
451 : // tags.
452 :
453 86 : NS_ASSERTION(theLastToken,
454 : "FindLastIndexOfTag lied to us!"
455 : " We couldn't find theTag on theStack");
456 86 : theLastToken->SetContainerInfo(eMalformed);
457 :
458 : // Great, now push all of the other tokens back onto the
459 : // stack to preserve the general structure of the document.
460 : // Note that we don't push the target token back onto the
461 : // the stack (since it was just closed).
462 776 : while (tempStack.GetSize() != 0) {
463 604 : theStack.Push(tempStack.Pop());
464 : }
465 : }
466 : }
467 : }
468 : }
469 319 : break;
470 : default:
471 0 : break;
472 : }
473 : }
474 : }
475 :
476 3037 : theToken = (CHTMLToken*)mTokenDeque.ObjectAt(++mTokenScanPos);
477 : }
478 :
479 25 : return result;
480 : }
481 :
482 : /**
483 : * This method is called after we're done tokenizing a chunk of data.
484 : *
485 : * @param aFinalChunk Tells us if this was the last chunk of data.
486 : * @return Error result.
487 : */
488 : nsresult
489 53 : nsHTMLTokenizer::DidTokenize(bool aFinalChunk)
490 : {
491 53 : return ScanDocStructure(aFinalChunk);
492 : }
493 :
494 : /**
495 : * This method is repeatedly called by the tokenizer.
496 : * Each time, we determine the kind of token we're about to
497 : * read, and then we call the appropriate method to handle
498 : * that token type.
499 : *
500 : * @param aScanner The source of our input.
501 : * @param aFlushTokens An OUT parameter to tell the caller whether it should
502 : * process our queued tokens up to now (e.g., when we
503 : * reach a <script>).
504 : * @return Success or error
505 : */
506 : nsresult
507 2308 : nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens)
508 : {
509 : PRUnichar theChar;
510 2308 : CToken* theToken = nsnull;
511 :
512 2308 : nsresult result = aScanner.Peek(theChar);
513 :
514 2308 : switch(result) {
515 : case kEOF:
516 : // Tell our caller that'we finished.
517 53 : return result;
518 :
519 : case NS_OK:
520 : default:
521 2255 : if (!(mFlags & NS_IPARSER_FLAG_PLAIN_TEXT)) {
522 2255 : if (kLessThan == theChar) {
523 1163 : return ConsumeTag(theChar, theToken, aScanner, aFlushTokens);
524 1092 : } else if (kAmpersand == theChar) {
525 0 : return ConsumeEntity(theChar, theToken, aScanner);
526 : }
527 : }
528 :
529 1092 : if (kCR == theChar || kLF == theChar) {
530 530 : return ConsumeNewline(theChar, theToken, aScanner);
531 : } else {
532 562 : if (!nsCRT::IsAsciiSpace(theChar)) {
533 281 : if (theChar != '\0') {
534 281 : result = ConsumeText(theToken, aScanner);
535 : } else {
536 : // Skip the embedded null char. Fix bug 64098.
537 0 : aScanner.GetChar(theChar);
538 : }
539 281 : break;
540 : }
541 281 : result = ConsumeWhitespace(theChar, theToken, aScanner);
542 : }
543 281 : break;
544 : }
545 :
546 562 : return result;
547 : }
548 :
549 : /**
550 : * This method is called just after a "<" has been consumed
551 : * and we know we're at the start of some kind of tagged
552 : * element. We don't know yet if it's a tag or a comment.
553 : *
554 : * @param aChar is the last char read
555 : * @param aToken is the out arg holding our new token (the function allocates
556 : * the return token using mTokenAllocator).
557 : * @param aScanner represents our input source
558 : * @param aFlushTokens is an OUT parameter use to tell consumers to flush
559 : * the current tokens after processing the current one.
560 : * @return error code.
561 : */
562 : nsresult
563 1163 : nsHTMLTokenizer::ConsumeTag(PRUnichar aChar,
564 : CToken*& aToken,
565 : nsScanner& aScanner,
566 : bool& aFlushTokens)
567 : {
568 : PRUnichar theNextChar, oldChar;
569 1163 : nsresult result = aScanner.Peek(aChar, 1);
570 :
571 1163 : if (NS_OK == result) {
572 1163 : switch (aChar) {
573 : case kForwardSlash:
574 319 : result = aScanner.Peek(theNextChar, 2);
575 :
576 319 : if (NS_OK == result) {
577 : // Get the original "<" (we've already seen it with a Peek)
578 319 : aScanner.GetChar(oldChar);
579 :
580 : // XML allows non ASCII tag names, consume this as an end tag. This
581 : // is needed to make XML view source work
582 319 : bool isXML = !!(mFlags & NS_IPARSER_FLAG_XML);
583 319 : if (nsCRT::IsAsciiAlpha(theNextChar) ||
584 : kGreaterThan == theNextChar ||
585 0 : (isXML && !nsCRT::IsAscii(theNextChar))) {
586 319 : result = ConsumeEndTag(aChar, aToken, aScanner);
587 : } else {
588 0 : result = ConsumeComment(aChar, aToken, aScanner);
589 : }
590 : }
591 :
592 319 : break;
593 :
594 : case kExclamation:
595 50 : result = aScanner.Peek(theNextChar, 2);
596 :
597 50 : if (NS_OK == result) {
598 : // Get the original "<" (we've already seen it with a Peek)
599 50 : aScanner.GetChar(oldChar);
600 :
601 50 : if (kMinus == theNextChar || kGreaterThan == theNextChar) {
602 25 : result = ConsumeComment(aChar, aToken, aScanner);
603 : } else {
604 25 : result = ConsumeSpecialMarkup(aChar, aToken, aScanner);
605 : }
606 : }
607 50 : break;
608 :
609 : case kQuestionMark:
610 : // It must be a processing instruction...
611 : // Get the original "<" (we've already seen it with a Peek)
612 0 : aScanner.GetChar(oldChar);
613 0 : result = ConsumeProcessingInstruction(aChar, aToken, aScanner);
614 0 : break;
615 :
616 : default:
617 : // XML allows non ASCII tag names, consume this as a start tag.
618 794 : bool isXML = !!(mFlags & NS_IPARSER_FLAG_XML);
619 794 : if (nsCRT::IsAsciiAlpha(aChar) ||
620 0 : (isXML && !nsCRT::IsAscii(aChar))) {
621 : // Get the original "<" (we've already seen it with a Peek)
622 794 : aScanner.GetChar(oldChar);
623 794 : result = ConsumeStartTag(aChar, aToken, aScanner, aFlushTokens);
624 : } else {
625 : // We are not dealing with a tag. So, don't consume the original
626 : // char and leave the decision to ConsumeText().
627 0 : result = ConsumeText(aToken, aScanner);
628 : }
629 : }
630 : }
631 :
632 : // Last ditch attempt to make sure we don't lose data.
633 1163 : if (kEOF == result && !aScanner.IsIncremental()) {
634 : // Whoops, we don't want to lose any data! Consume the rest as text.
635 : // This normally happens for either a trailing < or </
636 0 : result = ConsumeText(aToken, aScanner);
637 : }
638 :
639 1163 : return result;
640 : }
641 :
642 : /**
643 : * This method is called just after we've consumed a start or end
644 : * tag, and we now have to consume its attributes.
645 : *
646 : * @param aChar is the last char read
647 : * @param aToken is the start or end tag that "owns" these attributes.
648 : * @param aScanner represents our input source
649 : * @return Error result.
650 : */
651 : nsresult
652 237 : nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar,
653 : CToken* aToken,
654 : nsScanner& aScanner)
655 : {
656 237 : bool done = false;
657 237 : nsresult result = NS_OK;
658 237 : PRInt16 theAttrCount = 0;
659 :
660 237 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
661 :
662 1206 : while (!done && result == NS_OK) {
663 : CAttributeToken* theToken =
664 : static_cast<CAttributeToken*>
665 : (theAllocator->CreateTokenOfType(eToken_attribute,
666 732 : eHTMLTag_unknown));
667 732 : if (NS_LIKELY(theToken != nsnull)) {
668 : // Tell the new token to finish consuming text...
669 732 : result = theToken->Consume(aChar, aScanner, mFlags);
670 :
671 732 : if (NS_SUCCEEDED(result)) {
672 732 : ++theAttrCount;
673 732 : AddToken((CToken*&)theToken, result, &mTokenDeque, theAllocator);
674 : } else {
675 0 : IF_FREE(theToken, mTokenAllocator);
676 : // Bad attribute returns shouldn't propagate out.
677 0 : if (NS_ERROR_HTMLPARSER_BADATTRIBUTE == result) {
678 0 : result = NS_OK;
679 : }
680 : }
681 : }
682 : else {
683 0 : result = NS_ERROR_OUT_OF_MEMORY;
684 : }
685 :
686 : #ifdef DEBUG
687 732 : if (NS_SUCCEEDED(result)) {
688 732 : PRInt32 newline = 0;
689 732 : aScanner.SkipWhitespace(newline);
690 732 : NS_ASSERTION(newline == 0,
691 : "CAttribute::Consume() failed to collect all the newlines!");
692 : }
693 : #endif
694 732 : if (NS_SUCCEEDED(result)) {
695 732 : result = aScanner.Peek(aChar);
696 732 : if (NS_SUCCEEDED(result)) {
697 732 : if (aChar == kGreaterThan) { // You just ate the '>'
698 237 : aScanner.GetChar(aChar); // Skip the '>'
699 237 : done = true;
700 495 : } else if (aChar == kLessThan) {
701 0 : aToken->SetInError(true);
702 0 : done = true;
703 : }
704 : }
705 : }
706 : }
707 :
708 237 : if (NS_FAILED(result)) {
709 0 : aToken->SetInError(true);
710 :
711 0 : if (!aScanner.IsIncremental()) {
712 0 : result = NS_OK;
713 : }
714 : }
715 :
716 237 : aToken->SetAttributeCount(theAttrCount);
717 237 : return result;
718 : }
719 :
720 : /**
721 : * This method consumes a start tag and all of its attributes.
722 : *
723 : * @param aChar The last character read from the scanner.
724 : * @param aToken The OUT parameter that holds our resulting token. (allocated
725 : * by the function using mTokenAllocator
726 : * @param aScanner Our source of data
727 : * @param aFlushTokens is an OUT parameter use to tell consumers to flush
728 : * the current tokens after processing the current one.
729 : * @return Error result.
730 : */
731 : nsresult
732 794 : nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,
733 : CToken*& aToken,
734 : nsScanner& aScanner,
735 : bool& aFlushTokens)
736 : {
737 : // Remember this for later in case you have to unwind...
738 794 : PRInt32 theDequeSize = mTokenDeque.GetSize();
739 794 : nsresult result = NS_OK;
740 :
741 794 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
742 794 : aToken = theAllocator->CreateTokenOfType(eToken_start, eHTMLTag_unknown);
743 794 : NS_ENSURE_TRUE(aToken, NS_ERROR_OUT_OF_MEMORY);
744 :
745 : // Tell the new token to finish consuming text...
746 794 : result = aToken->Consume(aChar, aScanner, mFlags);
747 :
748 794 : if (NS_SUCCEEDED(result)) {
749 794 : AddToken(aToken, result, &mTokenDeque, theAllocator);
750 :
751 794 : eHTMLTags theTag = (eHTMLTags)aToken->GetTypeID();
752 :
753 : // Good. Now, let's see if the next char is ">".
754 : // If so, we have a complete tag, otherwise, we have attributes.
755 794 : result = aScanner.Peek(aChar);
756 794 : if (NS_FAILED(result)) {
757 0 : aToken->SetInError(true);
758 :
759 : // Don't return early here so we can create a text and end token for
760 : // the special <iframe>, <script> and similar tags down below.
761 0 : result = NS_OK;
762 : } else {
763 794 : if (kGreaterThan != aChar) { // Look for a '>'
764 237 : result = ConsumeAttributes(aChar, aToken, aScanner);
765 : } else {
766 557 : aScanner.GetChar(aChar);
767 : }
768 : }
769 :
770 : /* Now that that's over with, we have one more problem to solve.
771 : In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
772 : consume all the content itself.
773 : But XML doesn't treat these tags differently, so we shouldn't if the
774 : document is XML.
775 : */
776 794 : if (NS_SUCCEEDED(result) && !(mFlags & NS_IPARSER_FLAG_XML)) {
777 794 : bool isCDATA = gHTMLElements[theTag].CanContainType(kCDATA);
778 : bool isPCDATA = eHTMLTag_textarea == theTag ||
779 794 : eHTMLTag_title == theTag;
780 :
781 : // XXX This is an evil hack, we should be able to handle these properly
782 : // in the DTD.
783 794 : if ((eHTMLTag_iframe == theTag &&
784 : (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
785 : (eHTMLTag_noframes == theTag &&
786 : (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
787 : (eHTMLTag_noscript == theTag &&
788 : (mFlags & NS_IPARSER_FLAG_SCRIPT_ENABLED)) ||
789 : (eHTMLTag_noembed == theTag)) {
790 0 : isCDATA = true;
791 : }
792 :
793 : // Plaintext contains CDATA, but it's special, so we handle it
794 : // differently than the other CDATA elements
795 794 : if (eHTMLTag_plaintext == theTag) {
796 0 : isCDATA = false;
797 :
798 : // Note: We check in ConsumeToken() for this flag, and if we see it
799 : // we only construct text tokens (which is what we want).
800 0 : mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT;
801 : }
802 :
803 :
804 794 : if (isCDATA || isPCDATA) {
805 25 : bool done = false;
806 50 : nsDependentString endTagName(nsHTMLTags::GetStringValue(theTag));
807 :
808 : CToken* text =
809 25 : theAllocator->CreateTokenOfType(eToken_text, eHTMLTag_text);
810 25 : NS_ENSURE_TRUE(text, NS_ERROR_OUT_OF_MEMORY);
811 :
812 25 : CTextToken* textToken = static_cast<CTextToken*>(text);
813 :
814 25 : if (isCDATA) {
815 : result = textToken->ConsumeCharacterData(theTag != eHTMLTag_script,
816 : aScanner,
817 : endTagName,
818 : mFlags,
819 0 : done);
820 :
821 : // Only flush tokens for <script>, to give ourselves more of a
822 : // chance of allowing inlines to contain blocks.
823 0 : aFlushTokens = done && theTag == eHTMLTag_script;
824 25 : } else if (isPCDATA) {
825 : // Title is consumed conservatively in order to not regress
826 : // bug 42945
827 : result = textToken->ConsumeParsedCharacterData(
828 : theTag == eHTMLTag_textarea,
829 : theTag == eHTMLTag_title,
830 : aScanner,
831 : endTagName,
832 : mFlags,
833 25 : done);
834 :
835 : // Note: we *don't* set aFlushTokens here.
836 : }
837 :
838 : // We want to do this unless result is kEOF, in which case we will
839 : // simply unwind our stack and wait for more data anyway.
840 25 : if (kEOF != result) {
841 25 : AddToken(text, NS_OK, &mTokenDeque, theAllocator);
842 25 : CToken* endToken = nsnull;
843 :
844 25 : if (NS_SUCCEEDED(result) && done) {
845 : PRUnichar theChar;
846 : // Get the <
847 25 : result = aScanner.GetChar(theChar);
848 25 : NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kLessThan,
849 : "CTextToken::Consume*Data is broken!");
850 : #ifdef DEBUG
851 : // Ensure we have a /
852 : PRUnichar tempChar; // Don't change non-debug vars in debug-only code
853 25 : result = aScanner.Peek(tempChar);
854 25 : NS_ASSERTION(NS_SUCCEEDED(result) && tempChar == kForwardSlash,
855 : "CTextToken::Consume*Data is broken!");
856 : #endif
857 25 : result = ConsumeEndTag(PRUnichar('/'), endToken, aScanner);
858 50 : if (!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE) &&
859 25 : NS_SUCCEEDED(result)) {
860 : // If ConsumeCharacterData returned a success result (and
861 : // we're not in view source), then we want to make sure that
862 : // we're going to execute this script (since the result means
863 : // that we've found an end tag that satisfies all of the right
864 : // conditions).
865 25 : endToken->SetInError(false);
866 25 : }
867 0 : } else if (result == kFakeEndTag &&
868 0 : !(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
869 0 : result = NS_OK;
870 : endToken = theAllocator->CreateTokenOfType(eToken_end, theTag,
871 0 : endTagName);
872 0 : AddToken(endToken, result, &mTokenDeque, theAllocator);
873 0 : if (NS_LIKELY(endToken != nsnull)) {
874 0 : endToken->SetInError(true);
875 : }
876 : else {
877 0 : result = NS_ERROR_OUT_OF_MEMORY;
878 : }
879 0 : } else if (result == kFakeEndTag) {
880 : // If we are here, we are both faking having seen the end tag
881 : // and are in view-source.
882 0 : result = NS_OK;
883 : }
884 : } else {
885 0 : IF_FREE(text, mTokenAllocator);
886 : }
887 : }
888 : }
889 :
890 : // This code is confusing, so pay attention.
891 : // If you're here, it's because we were in the midst of consuming a start
892 : // tag but ran out of data (not in the stream, but in this *part* of the
893 : // stream. For simplicity, we have to unwind our input. Therefore, we pop
894 : // and discard any new tokens we've queued this round. Later we can get
895 : // smarter about this.
896 794 : if (NS_FAILED(result)) {
897 0 : while (mTokenDeque.GetSize()>theDequeSize) {
898 0 : CToken* theToken = (CToken*)mTokenDeque.Pop();
899 0 : IF_FREE(theToken, mTokenAllocator);
900 : }
901 : }
902 : } else {
903 0 : IF_FREE(aToken, mTokenAllocator);
904 : }
905 :
906 794 : return result;
907 : }
908 :
909 : /**
910 : * This method consumes an end tag and any "attributes" that may come after it.
911 : *
912 : * @param aChar The last character read from the scanner.
913 : * @param aToken The OUT parameter that holds our resulting token.
914 : * @param aScanner Our source of data
915 : * @return Error result
916 : */
917 : nsresult
918 344 : nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,
919 : CToken*& aToken,
920 : nsScanner& aScanner)
921 : {
922 : // Get the "/" (we've already seen it with a Peek)
923 344 : aScanner.GetChar(aChar);
924 :
925 344 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
926 344 : aToken = theAllocator->CreateTokenOfType(eToken_end, eHTMLTag_unknown);
927 344 : NS_ENSURE_TRUE(aToken, NS_ERROR_OUT_OF_MEMORY);
928 :
929 : // Remember this for later in case you have to unwind...
930 344 : PRInt32 theDequeSize = mTokenDeque.GetSize();
931 344 : nsresult result = NS_OK;
932 :
933 : // Tell the new token to finish consuming text...
934 344 : result = aToken->Consume(aChar, aScanner, mFlags);
935 344 : AddToken(aToken, result, &mTokenDeque, theAllocator);
936 344 : if (NS_FAILED(result)) {
937 : // Note that this early-return here is safe because we have not yet
938 : // added any of our tokens to the queue (AddToken only adds the token if
939 : // result is a success), so we don't need to fall through.
940 0 : return result;
941 : }
942 :
943 344 : result = aScanner.Peek(aChar);
944 344 : if (NS_FAILED(result)) {
945 0 : aToken->SetInError(true);
946 :
947 : // Note: We know here that the scanner is not incremental since if
948 : // this peek fails, then we've already masked over a kEOF coming from
949 : // the Consume() call above.
950 0 : return NS_OK;
951 : }
952 :
953 344 : if (kGreaterThan != aChar) {
954 0 : result = ConsumeAttributes(aChar, aToken, aScanner);
955 : } else {
956 344 : aScanner.GetChar(aChar);
957 : }
958 :
959 : // Do the same thing as we do in ConsumeStartTag. Basically, if we've run
960 : // out of room in this *section* of the document, pop all of the tokens
961 : // we've consumed this round and wait for more data.
962 344 : if (NS_FAILED(result)) {
963 0 : while (mTokenDeque.GetSize() > theDequeSize) {
964 0 : CToken* theToken = (CToken*)mTokenDeque.Pop();
965 0 : IF_FREE(theToken, mTokenAllocator);
966 : }
967 : }
968 :
969 344 : return result;
970 : }
971 :
972 : /**
973 : * This method is called just after a "&" has been consumed
974 : * and we know we're at the start of an entity.
975 : *
976 : * @param aChar The last character read from the scanner.
977 : * @param aToken The OUT parameter that holds our resulting token.
978 : * @param aScanner Our source of data
979 : * @return Error result.
980 : */
981 : nsresult
982 0 : nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,
983 : CToken*& aToken,
984 : nsScanner& aScanner)
985 : {
986 : PRUnichar theChar;
987 0 : nsresult result = aScanner.Peek(theChar, 1);
988 :
989 0 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
990 0 : if (NS_SUCCEEDED(result)) {
991 0 : if (nsCRT::IsAsciiAlpha(theChar) || theChar == kHashsign) {
992 0 : aToken = theAllocator->CreateTokenOfType(eToken_entity, eHTMLTag_entity);
993 0 : NS_ENSURE_TRUE(aToken, NS_ERROR_OUT_OF_MEMORY);
994 0 : result = aToken->Consume(theChar, aScanner, mFlags);
995 :
996 0 : if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
997 0 : IF_FREE(aToken, mTokenAllocator);
998 : } else {
999 0 : if (result == kEOF && !aScanner.IsIncremental()) {
1000 0 : result = NS_OK; // Use as much of the entity as you can get.
1001 : }
1002 :
1003 0 : AddToken(aToken, result, &mTokenDeque, theAllocator);
1004 0 : return result;
1005 : }
1006 : }
1007 :
1008 : // Oops, we're actually looking at plain text...
1009 0 : result = ConsumeText(aToken, aScanner);
1010 0 : } else if (result == kEOF && !aScanner.IsIncremental()) {
1011 : // If the last character in the file is an &, consume it as text.
1012 0 : result = ConsumeText(aToken, aScanner);
1013 0 : if (aToken) {
1014 0 : aToken->SetInError(true);
1015 : }
1016 : }
1017 :
1018 0 : return result;
1019 : }
1020 :
1021 :
1022 : /**
1023 : * This method is called just after whitespace has been
1024 : * consumed and we know we're at the start a whitespace run.
1025 : *
1026 : * @param aChar The last character read from the scanner.
1027 : * @param aToken The OUT parameter that holds our resulting token.
1028 : * @param aScanner Our source of data
1029 : * @return Error result.
1030 : */
1031 : nsresult
1032 281 : nsHTMLTokenizer::ConsumeWhitespace(PRUnichar aChar,
1033 : CToken*& aToken,
1034 : nsScanner& aScanner)
1035 : {
1036 : // Get the whitespace character
1037 281 : aScanner.GetChar(aChar);
1038 :
1039 281 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
1040 : aToken = theAllocator->CreateTokenOfType(eToken_whitespace,
1041 281 : eHTMLTag_whitespace);
1042 281 : nsresult result = NS_OK;
1043 281 : if (aToken) {
1044 281 : result = aToken->Consume(aChar, aScanner, mFlags);
1045 281 : AddToken(aToken, result, &mTokenDeque, theAllocator);
1046 : }
1047 :
1048 281 : return result;
1049 : }
1050 :
1051 : /**
1052 : * This method is called just after a "<!" has been consumed
1053 : * and we know we're at the start of a comment.
1054 : *
1055 : * @param aChar The last character read from the scanner.
1056 : * @param aToken The OUT parameter that holds our resulting token.
1057 : * @param aScanner Our source of data
1058 : * @return Error result.
1059 : */
1060 : nsresult
1061 25 : nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,
1062 : CToken*& aToken,
1063 : nsScanner& aScanner)
1064 : {
1065 : // Get the "!"
1066 25 : aScanner.GetChar(aChar);
1067 :
1068 25 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
1069 25 : aToken = theAllocator->CreateTokenOfType(eToken_comment, eHTMLTag_comment);
1070 25 : nsresult result = NS_OK;
1071 25 : if (aToken) {
1072 25 : result = aToken->Consume(aChar, aScanner, mFlags);
1073 25 : AddToken(aToken, result, &mTokenDeque, theAllocator);
1074 : }
1075 :
1076 25 : if (kNotAComment == result) {
1077 : // AddToken has IF_FREE()'d our token, so...
1078 0 : result = ConsumeText(aToken, aScanner);
1079 : }
1080 :
1081 25 : return result;
1082 : }
1083 :
1084 : /**
1085 : * This method is called just after a known text char has
1086 : * been consumed and we should read a text run. Note: we actually ignore the
1087 : * first character of the text run so that we can consume invalid markup
1088 : * as text.
1089 : *
1090 : * @param aToken The OUT parameter that holds our resulting token.
1091 : * @param aScanner Our source of data
1092 : * @return Error result.
1093 : */
1094 : nsresult
1095 281 : nsHTMLTokenizer::ConsumeText(CToken*& aToken, nsScanner& aScanner)
1096 : {
1097 281 : nsresult result = NS_OK;
1098 281 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
1099 : CTextToken* theToken =
1100 281 : (CTextToken*)theAllocator->CreateTokenOfType(eToken_text, eHTMLTag_text);
1101 281 : if (theToken) {
1102 281 : PRUnichar ch = '\0';
1103 281 : result = theToken->Consume(ch, aScanner, mFlags);
1104 281 : if (NS_FAILED(result)) {
1105 0 : if (0 == theToken->GetTextLength()) {
1106 0 : IF_FREE(aToken, mTokenAllocator);
1107 0 : aToken = nsnull;
1108 : } else {
1109 0 : result = NS_OK;
1110 : }
1111 : }
1112 :
1113 281 : aToken = theToken;
1114 281 : AddToken(aToken, result, &mTokenDeque, theAllocator);
1115 : }
1116 :
1117 281 : return result;
1118 : }
1119 :
1120 : /**
1121 : * This method is called just after a "<!" has been consumed.
1122 : * NOTE: Here we might consume DOCTYPE and "special" markups.
1123 : *
1124 : * @param aChar The last character read from the scanner.
1125 : * @param aToken The OUT parameter that holds our resulting token.
1126 : * @param aScanner Our source of data
1127 : * @return Error result.
1128 : */
1129 : nsresult
1130 25 : nsHTMLTokenizer::ConsumeSpecialMarkup(PRUnichar aChar,
1131 : CToken*& aToken,
1132 : nsScanner& aScanner)
1133 : {
1134 : // Get the "!"
1135 25 : aScanner.GetChar(aChar);
1136 :
1137 25 : nsresult result = NS_OK;
1138 50 : nsAutoString theBufCopy;
1139 25 : aScanner.Peek(theBufCopy, 20);
1140 25 : ToUpperCase(theBufCopy);
1141 25 : PRInt32 theIndex = theBufCopy.Find("DOCTYPE", false, 0, 0);
1142 25 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
1143 :
1144 25 : if (theIndex == kNotFound) {
1145 0 : if ('[' == theBufCopy.CharAt(0)) {
1146 : aToken = theAllocator->CreateTokenOfType(eToken_cdatasection,
1147 0 : eHTMLTag_comment);
1148 0 : } else if (StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ELEMENT")) ||
1149 0 : StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ATTLIST")) ||
1150 0 : StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ENTITY")) ||
1151 0 : StringBeginsWith(theBufCopy, NS_LITERAL_STRING("NOTATION"))) {
1152 : aToken = theAllocator->CreateTokenOfType(eToken_markupDecl,
1153 0 : eHTMLTag_markupDecl);
1154 : } else {
1155 : aToken = theAllocator->CreateTokenOfType(eToken_comment,
1156 0 : eHTMLTag_comment);
1157 : }
1158 : } else {
1159 : aToken = theAllocator->CreateTokenOfType(eToken_doctypeDecl,
1160 25 : eHTMLTag_doctypeDecl);
1161 : }
1162 :
1163 25 : if (aToken) {
1164 25 : result = aToken->Consume(aChar, aScanner, mFlags);
1165 25 : AddToken(aToken, result, &mTokenDeque, theAllocator);
1166 : }
1167 :
1168 25 : if (result == kNotAComment) {
1169 0 : result = ConsumeText(aToken, aScanner);
1170 : }
1171 :
1172 25 : return result;
1173 : }
1174 :
1175 : /**
1176 : * This method is called just after a newline has been consumed.
1177 : *
1178 : * @param aChar The last character read from the scanner.
1179 : * @param aToken The OUT parameter that holds our resulting token.
1180 : * @param aScanner Our source of data
1181 : * @return Error result.
1182 : */
1183 : nsresult
1184 530 : nsHTMLTokenizer::ConsumeNewline(PRUnichar aChar,
1185 : CToken*& aToken,
1186 : nsScanner& aScanner)
1187 : {
1188 : // Get the newline character
1189 530 : aScanner.GetChar(aChar);
1190 :
1191 530 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
1192 530 : aToken = theAllocator->CreateTokenOfType(eToken_newline, eHTMLTag_newline);
1193 530 : nsresult result = NS_OK;
1194 530 : if (aToken) {
1195 530 : result = aToken->Consume(aChar, aScanner, mFlags);
1196 530 : AddToken(aToken, result, &mTokenDeque, theAllocator);
1197 : }
1198 :
1199 530 : return result;
1200 : }
1201 :
1202 :
1203 : /**
1204 : * This method is called just after a <? has been consumed.
1205 : *
1206 : * @param aChar The last character read from the scanner.
1207 : * @param aToken The OUT parameter that holds our resulting token.
1208 : * @param aScanner Our source of data
1209 : * @return Error result.
1210 : */
1211 : nsresult
1212 0 : nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,
1213 : CToken*& aToken,
1214 : nsScanner& aScanner)
1215 : {
1216 : // Get the "?"
1217 0 : aScanner.GetChar(aChar);
1218 :
1219 0 : nsTokenAllocator* theAllocator = this->GetTokenAllocator();
1220 : aToken = theAllocator->CreateTokenOfType(eToken_instruction,
1221 0 : eHTMLTag_unknown);
1222 0 : nsresult result = NS_OK;
1223 0 : if (aToken) {
1224 0 : result = aToken->Consume(aChar, aScanner, mFlags);
1225 0 : AddToken(aToken, result, &mTokenDeque, theAllocator);
1226 : }
1227 :
1228 0 : return result;
1229 : }
|