1 : /*
2 : * Copyright (c) 2005-2007 Henri Sivonen
3 : * Copyright (c) 2007-2010 Mozilla Foundation
4 : * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5 : * Foundation, and Opera Software ASA.
6 : *
7 : * Permission is hereby granted, free of charge, to any person obtaining a
8 : * copy of this software and associated documentation files (the "Software"),
9 : * to deal in the Software without restriction, including without limitation
10 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 : * and/or sell copies of the Software, and to permit persons to whom the
12 : * Software is furnished to do so, subject to the following conditions:
13 : *
14 : * The above copyright notice and this permission notice shall be included in
15 : * all copies or substantial portions of the Software.
16 : *
17 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 : * DEALINGS IN THE SOFTWARE.
24 : */
25 :
26 : /*
27 : * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28 : * Please edit Tokenizer.java instead and regenerate.
29 : */
30 :
31 : #ifndef nsHtml5Tokenizer_h__
32 : #define nsHtml5Tokenizer_h__
33 :
34 : #include "prtypes.h"
35 : #include "nsIAtom.h"
36 : #include "nsHtml5AtomTable.h"
37 : #include "nsString.h"
38 : #include "nsIContent.h"
39 : #include "nsTraceRefcnt.h"
40 : #include "jArray.h"
41 : #include "nsHtml5DocumentMode.h"
42 : #include "nsHtml5ArrayCopy.h"
43 : #include "nsHtml5NamedCharacters.h"
44 : #include "nsHtml5NamedCharactersAccel.h"
45 : #include "nsHtml5Atoms.h"
46 : #include "nsAHtml5TreeBuilderState.h"
47 : #include "nsHtml5Macros.h"
48 : #include "nsHtml5Highlighter.h"
49 : #include "nsHtml5TokenizerLoopPolicies.h"
50 :
51 : class nsHtml5StreamParser;
52 :
53 : class nsHtml5TreeBuilder;
54 : class nsHtml5MetaScanner;
55 : class nsHtml5AttributeName;
56 : class nsHtml5ElementName;
57 : class nsHtml5HtmlAttributes;
58 : class nsHtml5UTF16Buffer;
59 : class nsHtml5StateSnapshot;
60 : class nsHtml5Portability;
61 :
62 :
63 : class nsHtml5Tokenizer
64 : {
65 : private:
66 : static PRUnichar LT_GT[];
67 : static PRUnichar LT_SOLIDUS[];
68 : static PRUnichar RSQB_RSQB[];
69 : static PRUnichar REPLACEMENT_CHARACTER[];
70 : static PRUnichar LF[];
71 : static PRUnichar CDATA_LSQB[];
72 : static PRUnichar OCTYPE[];
73 : static PRUnichar UBLIC[];
74 : static PRUnichar YSTEM[];
75 : static staticJArray<PRUnichar,PRInt32> TITLE_ARR;
76 : static staticJArray<PRUnichar,PRInt32> SCRIPT_ARR;
77 : static staticJArray<PRUnichar,PRInt32> STYLE_ARR;
78 : static staticJArray<PRUnichar,PRInt32> PLAINTEXT_ARR;
79 : static staticJArray<PRUnichar,PRInt32> XMP_ARR;
80 : static staticJArray<PRUnichar,PRInt32> TEXTAREA_ARR;
81 : static staticJArray<PRUnichar,PRInt32> IFRAME_ARR;
82 : static staticJArray<PRUnichar,PRInt32> NOEMBED_ARR;
83 : static staticJArray<PRUnichar,PRInt32> NOSCRIPT_ARR;
84 : static staticJArray<PRUnichar,PRInt32> NOFRAMES_ARR;
85 : protected:
86 : nsHtml5TreeBuilder* tokenHandler;
87 : nsHtml5StreamParser* encodingDeclarationHandler;
88 : bool lastCR;
89 : PRInt32 stateSave;
90 : private:
91 : PRInt32 returnStateSave;
92 : protected:
93 : PRInt32 index;
94 : private:
95 : bool forceQuirks;
96 : PRUnichar additional;
97 : PRInt32 entCol;
98 : PRInt32 firstCharKey;
99 : PRInt32 lo;
100 : PRInt32 hi;
101 : PRInt32 candidate;
102 : PRInt32 strBufMark;
103 : PRInt32 prevValue;
104 : protected:
105 : PRInt32 value;
106 : private:
107 : bool seenDigits;
108 : protected:
109 : PRInt32 cstart;
110 : private:
111 : nsString* publicId;
112 : nsString* systemId;
113 : autoJArray<PRUnichar,PRInt32> strBuf;
114 : PRInt32 strBufLen;
115 : autoJArray<PRUnichar,PRInt32> longStrBuf;
116 : PRInt32 longStrBufLen;
117 : autoJArray<PRUnichar,PRInt32> bmpChar;
118 : autoJArray<PRUnichar,PRInt32> astralChar;
119 : protected:
120 : nsHtml5ElementName* endTagExpectation;
121 : private:
122 : jArray<PRUnichar,PRInt32> endTagExpectationAsArray;
123 : protected:
124 : bool endTag;
125 : private:
126 : nsHtml5ElementName* tagName;
127 : protected:
128 : nsHtml5AttributeName* attributeName;
129 : private:
130 : nsIAtom* doctypeName;
131 : nsString* publicIdentifier;
132 : nsString* systemIdentifier;
133 : nsHtml5HtmlAttributes* attributes;
134 : PRInt32 mappingLangToXmlLang;
135 : bool shouldSuspend;
136 : protected:
137 : bool confident;
138 : private:
139 : PRInt32 line;
140 : nsHtml5AtomTable* interner;
141 : bool viewingXmlSource;
142 : public:
143 : nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
144 : void setInterner(nsHtml5AtomTable* interner);
145 : void initLocation(nsString* newPublicId, nsString* newSystemId);
146 : bool isViewingXmlSource();
147 : void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation);
148 : void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsHtml5ElementName* endTagExpectation);
149 : private:
150 : void endTagExpectationToArray();
151 : public:
152 : void setLineNumber(PRInt32 line);
153 0 : inline PRInt32 getLineNumber()
154 : {
155 0 : return line;
156 : }
157 :
158 : nsHtml5HtmlAttributes* emptyAttributes();
159 : private:
160 142 : inline void clearStrBufAndAppend(PRUnichar c)
161 : {
162 142 : strBuf[0] = c;
163 142 : strBufLen = 1;
164 142 : }
165 :
166 0 : inline void clearStrBuf()
167 : {
168 0 : strBufLen = 0;
169 0 : }
170 :
171 : void appendStrBuf(PRUnichar c);
172 : protected:
173 : nsString* strBufToString();
174 : private:
175 : void strBufToDoctypeName();
176 : void emitStrBuf();
177 0 : inline void clearLongStrBuf()
178 : {
179 0 : longStrBufLen = 0;
180 0 : }
181 :
182 0 : inline void clearLongStrBufAndAppend(PRUnichar c)
183 : {
184 0 : longStrBuf[0] = c;
185 0 : longStrBufLen = 1;
186 0 : }
187 :
188 : void appendLongStrBuf(PRUnichar c);
189 0 : inline void appendSecondHyphenToBogusComment()
190 : {
191 0 : appendLongStrBuf('-');
192 0 : }
193 :
194 0 : inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c)
195 : {
196 0 : errConsecutiveHyphens();
197 0 : appendLongStrBuf(c);
198 0 : }
199 :
200 : void appendLongStrBuf(PRUnichar* buffer, PRInt32 offset, PRInt32 length);
201 0 : inline void appendStrBufToLongStrBuf()
202 : {
203 0 : appendLongStrBuf(strBuf, 0, strBufLen);
204 0 : }
205 :
206 : nsString* longStrBufToString();
207 : void emitComment(PRInt32 provisionalHyphens, PRInt32 pos);
208 : protected:
209 : void flushChars(PRUnichar* buf, PRInt32 pos);
210 : private:
211 : void resetAttributes();
212 : void strBufToElementNameString();
213 : PRInt32 emitCurrentTagToken(bool selfClosing, PRInt32 pos);
214 : void attributeNameComplete();
215 : void addAttributeWithoutValue();
216 : void addAttributeWithValue();
217 : public:
218 : void start();
219 : bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
220 : private:
221 468 : template<class P> PRInt32 stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, bool reconsume, PRInt32 returnState, PRInt32 endPos);
222 : void initDoctypeFields();
223 0 : inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
224 : {
225 0 : silentCarriageReturn();
226 0 : adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
227 0 : }
228 :
229 0 : inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
230 : {
231 0 : silentLineFeed();
232 0 : adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
233 0 : }
234 :
235 0 : inline void appendLongStrBufLineFeed()
236 : {
237 0 : silentLineFeed();
238 0 : appendLongStrBuf('\n');
239 0 : }
240 :
241 0 : inline void appendLongStrBufCarriageReturn()
242 : {
243 0 : silentCarriageReturn();
244 0 : appendLongStrBuf('\n');
245 0 : }
246 :
247 : protected:
248 0 : inline void silentCarriageReturn()
249 : {
250 0 : ++line;
251 0 : lastCR = true;
252 0 : }
253 :
254 5 : inline void silentLineFeed()
255 : {
256 5 : ++line;
257 5 : }
258 :
259 : private:
260 : void emitCarriageReturn(PRUnichar* buf, PRInt32 pos);
261 : void emitReplacementCharacter(PRUnichar* buf, PRInt32 pos);
262 : void emitPlaintextReplacementCharacter(PRUnichar* buf, PRInt32 pos);
263 : void setAdditionalAndRememberAmpersandLocation(PRUnichar add);
264 : void bogusDoctype();
265 : void bogusDoctypeWithoutQuirks();
266 : void emitOrAppendStrBuf(PRInt32 returnState);
267 : void handleNcrValue(PRInt32 returnState);
268 : public:
269 : void eof();
270 : private:
271 : void emitDoctypeToken(PRInt32 pos);
272 : protected:
273 9272 : inline PRUnichar checkChar(PRUnichar* buf, PRInt32 pos)
274 : {
275 9272 : return buf[pos];
276 : }
277 :
278 : public:
279 : bool internalEncodingDeclaration(nsString* internalCharset);
280 : private:
281 : void emitOrAppendTwo(const PRUnichar* val, PRInt32 returnState);
282 : void emitOrAppendOne(const PRUnichar* val, PRInt32 returnState);
283 : public:
284 : void end();
285 : void requestSuspension();
286 : bool isInDataState();
287 : void resetToDataState();
288 : void loadState(nsHtml5Tokenizer* other);
289 : void initializeWithoutStarting();
290 : void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
291 : ~nsHtml5Tokenizer();
292 : static void initializeStatics();
293 : static void releaseStatics();
294 :
295 : #include "nsHtml5TokenizerHSupplement.h"
296 : };
297 :
298 : #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
299 : #define NS_HTML5TOKENIZER_DATA 0
300 : #define NS_HTML5TOKENIZER_RCDATA 1
301 : #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
302 : #define NS_HTML5TOKENIZER_RAWTEXT 3
303 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
304 : #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
305 : #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
306 : #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
307 : #define NS_HTML5TOKENIZER_PLAINTEXT 8
308 : #define NS_HTML5TOKENIZER_TAG_OPEN 9
309 : #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
310 : #define NS_HTML5TOKENIZER_TAG_NAME 11
311 : #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
312 : #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
313 : #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
314 : #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
315 : #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
316 : #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
317 : #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
318 : #define NS_HTML5TOKENIZER_DOCTYPE 19
319 : #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
320 : #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
321 : #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
322 : #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
323 : #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
324 : #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
325 : #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
326 : #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
327 : #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
328 : #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
329 : #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
330 : #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
331 : #define NS_HTML5TOKENIZER_COMMENT_START 32
332 : #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
333 : #define NS_HTML5TOKENIZER_COMMENT 34
334 : #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
335 : #define NS_HTML5TOKENIZER_COMMENT_END 36
336 : #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
337 : #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
338 : #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
339 : #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
340 : #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
341 : #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
342 : #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
343 : #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
344 : #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
345 : #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
346 : #define NS_HTML5TOKENIZER_CONSUME_NCR 47
347 : #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
348 : #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
349 : #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
350 : #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
351 : #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
352 : #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
353 : #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
354 : #define NS_HTML5TOKENIZER_CDATA_START 55
355 : #define NS_HTML5TOKENIZER_CDATA_SECTION 56
356 : #define NS_HTML5TOKENIZER_CDATA_RSQB 57
357 : #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
358 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
359 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
360 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
361 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
362 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
363 : #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
364 : #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
365 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
366 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
367 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
368 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
369 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
370 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
371 : #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
372 : #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
373 : #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
374 : #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
375 : #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
376 :
377 :
378 : #endif
379 :
|