1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=2 sw=2 et tw=80: */
3 : /* ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is mozilla.org code.
17 : *
18 : * The Initial Developer of the Original Code is
19 : * Netscape Communications Corporation.
20 : * Portions created by the Initial Developer are Copyright (C) 1998
21 : * the Initial Developer. All Rights Reserved.
22 : *
23 : * Contributor(s):
24 : * Ryan Jones <sciguyryan@gmail.com>
25 : * Laurent Jouanneau <laurent.jouanneau@disruptive-innovations.com>
26 : *
27 : * Alternatively, the contents of this file may be used under the terms of
28 : * either of the GNU General Public License Version 2 or later (the "GPL"),
29 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 : * in which case the provisions of the GPL or the LGPL are applicable instead
31 : * of those above. If you wish to allow use of your version of this file only
32 : * under the terms of either the GPL or the LGPL, and not to allow others to
33 : * use your version of this file under the terms of the MPL, indicate your
34 : * decision by deleting the provisions above and replace them with the notice
35 : * and other provisions required by the GPL or the LGPL. If you do not delete
36 : * the provisions above, a recipient may use your version of this file under
37 : * the terms of any one of the MPL, the GPL or the LGPL.
38 : *
39 : * ***** END LICENSE BLOCK ***** */
40 :
41 : /*
42 : * nsIContentSerializer implementation that can be used with an
43 : * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
44 : * string that could be parsed into more or less the original DOM.
45 : */
46 :
47 : #include "nsHTMLContentSerializer.h"
48 :
49 : #include "nsIDOMElement.h"
50 : #include "nsIContent.h"
51 : #include "nsIDocument.h"
52 : #include "nsIDOMDocument.h"
53 : #include "nsINameSpaceManager.h"
54 : #include "nsString.h"
55 : #include "nsUnicharUtils.h"
56 : #include "nsXPIDLString.h"
57 : #include "nsIServiceManager.h"
58 : #include "nsIDocumentEncoder.h"
59 : #include "nsGkAtoms.h"
60 : #include "nsIURI.h"
61 : #include "nsNetUtil.h"
62 : #include "nsEscape.h"
63 : #include "nsITextToSubURI.h"
64 : #include "nsCRT.h"
65 : #include "nsIParserService.h"
66 : #include "nsContentUtils.h"
67 : #include "nsLWBrkCIID.h"
68 : #include "nsIScriptElement.h"
69 : #include "nsAttrName.h"
70 : #include "nsIDocShell.h"
71 : #include "nsIEditorDocShell.h"
72 : #include "nsIEditor.h"
73 : #include "nsIHTMLEditor.h"
74 : #include "mozilla/dom/Element.h"
75 : #include "nsParserConstants.h"
76 :
77 : using namespace mozilla::dom;
78 :
79 : static const PRInt32 kLongLineLen = 128;
80 :
81 0 : nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
82 : {
83 0 : nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
84 0 : if (!it) {
85 0 : return NS_ERROR_OUT_OF_MEMORY;
86 : }
87 :
88 0 : return CallQueryInterface(it, aSerializer);
89 : }
90 :
91 0 : nsHTMLContentSerializer::nsHTMLContentSerializer()
92 : {
93 0 : mIsHTMLSerializer = true;
94 0 : }
95 :
96 0 : nsHTMLContentSerializer::~nsHTMLContentSerializer()
97 : {
98 0 : }
99 :
100 :
101 : NS_IMETHODIMP
102 0 : nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
103 : nsAString& aStr)
104 : {
105 0 : return NS_OK;
106 : }
107 :
108 : void
109 0 : nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent,
110 : nsIContent *aOriginalElement,
111 : nsAString& aTagPrefix,
112 : const nsAString& aTagNamespaceURI,
113 : nsIAtom* aTagName,
114 : PRInt32 aNamespace,
115 : nsAString& aStr)
116 : {
117 0 : PRInt32 count = aContent->GetAttrCount();
118 0 : if (!count)
119 0 : return;
120 :
121 : nsresult rv;
122 0 : nsAutoString valueStr;
123 0 : NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
124 :
125 0 : for (PRInt32 index = count; index > 0;) {
126 0 : --index;
127 0 : const nsAttrName* name = aContent->GetAttrNameAt(index);
128 0 : PRInt32 namespaceID = name->NamespaceID();
129 0 : nsIAtom* attrName = name->LocalName();
130 :
131 : // Filter out any attribute starting with [-|_]moz
132 0 : nsDependentAtomString attrNameStr(attrName);
133 0 : if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
134 0 : StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
135 0 : continue;
136 : }
137 0 : aContent->GetAttr(namespaceID, attrName, valueStr);
138 :
139 : //
140 : // Filter out special case of <br type="_moz"> or <br _moz*>,
141 : // used by the editor. Bug 16988. Yuck.
142 : //
143 0 : if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML &&
144 : attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None &&
145 0 : StringBeginsWith(valueStr, _mozStr)) {
146 0 : continue;
147 : }
148 :
149 0 : if (mIsCopying && mIsFirstChildOfOL &&
150 : aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML &&
151 : attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){
152 : // This is handled separately in SerializeLIValueAttribute()
153 0 : continue;
154 : }
155 0 : bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
156 :
157 0 : if (((attrName == nsGkAtoms::href &&
158 : (namespaceID == kNameSpaceID_None ||
159 : namespaceID == kNameSpaceID_XLink)) ||
160 : (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
161 : // Make all links absolute when converting only the selection:
162 0 : if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
163 : // Would be nice to handle OBJECT and APPLET tags,
164 : // but that gets more complicated since we have to
165 : // search the tag list for CODEBASE as well.
166 : // For now, just leave them relative.
167 0 : nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
168 0 : if (uri) {
169 0 : nsAutoString absURI;
170 0 : rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
171 0 : if (NS_SUCCEEDED(rv)) {
172 0 : valueStr = absURI;
173 : }
174 : }
175 : }
176 : // Need to escape URI.
177 0 : nsAutoString tempURI(valueStr);
178 0 : if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
179 0 : valueStr = tempURI;
180 : }
181 :
182 0 : if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
183 : aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
184 : && namespaceID == kNameSpaceID_None) {
185 : // If we're serializing a <meta http-equiv="content-type">,
186 : // use the proper value, rather than what's in the document.
187 0 : nsAutoString header;
188 0 : aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
189 0 : if (header.LowerCaseEqualsLiteral("content-type")) {
190 0 : valueStr = NS_LITERAL_STRING("text/html; charset=") +
191 0 : NS_ConvertASCIItoUTF16(mCharset);
192 : }
193 : }
194 :
195 0 : nsDependentAtomString nameStr(attrName);
196 0 : nsAutoString prefix;
197 0 : if (namespaceID == kNameSpaceID_XML) {
198 0 : prefix.Assign(NS_LITERAL_STRING("xml"));
199 0 : } else if (namespaceID == kNameSpaceID_XLink) {
200 0 : prefix.Assign(NS_LITERAL_STRING("xlink"));
201 : }
202 :
203 : // Expand shorthand attribute.
204 0 : if (aNamespace == kNameSpaceID_XHTML &&
205 : namespaceID == kNameSpaceID_None &&
206 0 : IsShorthandAttr(attrName, aTagName) &&
207 0 : valueStr.IsEmpty()) {
208 0 : valueStr = nameStr;
209 : }
210 0 : SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS);
211 : }
212 : }
213 :
214 : NS_IMETHODIMP
215 0 : nsHTMLContentSerializer::AppendElementStart(Element* aElement,
216 : Element* aOriginalElement,
217 : nsAString& aStr)
218 : {
219 0 : NS_ENSURE_ARG(aElement);
220 :
221 0 : nsIContent* content = aElement;
222 :
223 0 : bool forceFormat = false;
224 0 : if (!CheckElementStart(content, forceFormat, aStr)) {
225 0 : return NS_OK;
226 : }
227 :
228 0 : nsIAtom *name = content->Tag();
229 0 : PRInt32 ns = content->GetNameSpaceID();
230 :
231 0 : bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
232 :
233 0 : if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
234 0 : if (mColPos && lineBreakBeforeOpen) {
235 0 : AppendNewLineToString(aStr);
236 : }
237 : else {
238 0 : MaybeAddNewlineForRootNode(aStr);
239 : }
240 0 : if (!mColPos) {
241 0 : AppendIndentation(aStr);
242 : }
243 0 : else if (mAddSpace) {
244 0 : AppendToString(PRUnichar(' '), aStr);
245 0 : mAddSpace = false;
246 : }
247 : }
248 0 : else if (mAddSpace) {
249 0 : AppendToString(PRUnichar(' '), aStr);
250 0 : mAddSpace = false;
251 : }
252 : else {
253 0 : MaybeAddNewlineForRootNode(aStr);
254 : }
255 : // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
256 : // called
257 0 : mAddNewlineForRootNode = false;
258 :
259 0 : AppendToString(kLessThan, aStr);
260 :
261 0 : AppendToString(nsDependentAtomString(name), aStr);
262 :
263 0 : MaybeEnterInPreContent(content);
264 :
265 : // for block elements, we increase the indentation
266 0 : if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw)
267 0 : IncrIndentation(name);
268 :
269 : // Need to keep track of OL and LI elements in order to get ordinal number
270 : // for the LI.
271 0 : if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){
272 : // We are copying and current node is an OL;
273 : // Store its start attribute value in olState->startVal.
274 0 : nsAutoString start;
275 0 : PRInt32 startAttrVal = 0;
276 :
277 0 : aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
278 0 : if (!start.IsEmpty()){
279 0 : PRInt32 rv = 0;
280 0 : startAttrVal = start.ToInteger(&rv);
281 : //If OL has "start" attribute, first LI element has to start with that value
282 : //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
283 : //In failure of ToInteger(), default StartAttrValue to 0.
284 0 : if (NS_SUCCEEDED(rv))
285 0 : startAttrVal--;
286 : else
287 0 : startAttrVal = 0;
288 : }
289 0 : mOLStateStack.AppendElement(olState(startAttrVal, true));
290 : }
291 :
292 0 : if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
293 0 : mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
294 0 : if (mIsFirstChildOfOL){
295 : // If OL is parent of this LI, serialize attributes in different manner.
296 0 : SerializeLIValueAttribute(aElement, aStr);
297 : }
298 : }
299 :
300 : // Even LI passed above have to go through this
301 : // for serializing attributes other than "value".
302 0 : nsAutoString dummyPrefix;
303 : SerializeHTMLAttributes(content,
304 : aOriginalElement,
305 : dummyPrefix,
306 0 : EmptyString(),
307 : name,
308 : ns,
309 0 : aStr);
310 :
311 0 : AppendToString(kGreaterThan, aStr);
312 :
313 0 : if (ns == kNameSpaceID_XHTML &&
314 : (name == nsGkAtoms::script ||
315 : name == nsGkAtoms::style ||
316 : name == nsGkAtoms::noscript ||
317 : name == nsGkAtoms::noframes)) {
318 0 : ++mDisableEntityEncoding;
319 : }
320 :
321 0 : if ((mDoFormat || forceFormat) && !mPreLevel &&
322 0 : !mDoRaw && LineBreakAfterOpen(ns, name)) {
323 0 : AppendNewLineToString(aStr);
324 : }
325 :
326 0 : AfterElementStart(content, aOriginalElement, aStr);
327 :
328 0 : return NS_OK;
329 : }
330 :
331 : NS_IMETHODIMP
332 0 : nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
333 : nsAString& aStr)
334 : {
335 0 : NS_ENSURE_ARG(aElement);
336 :
337 0 : nsIContent* content = aElement;
338 :
339 0 : nsIAtom *name = content->Tag();
340 0 : PRInt32 ns = content->GetNameSpaceID();
341 :
342 0 : if (ns == kNameSpaceID_XHTML &&
343 : (name == nsGkAtoms::script ||
344 : name == nsGkAtoms::style ||
345 : name == nsGkAtoms::noscript ||
346 : name == nsGkAtoms::noframes)) {
347 0 : --mDisableEntityEncoding;
348 : }
349 :
350 : bool forceFormat = content->HasAttr(kNameSpaceID_None,
351 0 : nsGkAtoms::mozdirty);
352 :
353 0 : if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
354 0 : DecrIndentation(name);
355 : }
356 :
357 0 : if (name == nsGkAtoms::script) {
358 0 : nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
359 :
360 0 : if (script && script->IsMalformed()) {
361 : // We're looking at a malformed script tag. This means that the end tag
362 : // was missing in the source. Imitate that here by not serializing the end
363 : // tag.
364 0 : --mPreLevel;
365 0 : return NS_OK;
366 : }
367 : }
368 0 : else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
369 0 : NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
370 : /* Though at this point we must always have an state to be deleted as all
371 : the OL opening tags are supposed to push an olState object to the stack*/
372 0 : if (!mOLStateStack.IsEmpty()) {
373 0 : mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
374 : }
375 : }
376 :
377 0 : if (ns == kNameSpaceID_XHTML) {
378 0 : nsIParserService* parserService = nsContentUtils::GetParserService();
379 :
380 0 : if (parserService) {
381 : bool isContainer;
382 :
383 : parserService->
384 0 : IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
385 0 : isContainer);
386 0 : if (!isContainer) {
387 0 : return NS_OK;
388 : }
389 : }
390 : }
391 :
392 0 : if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
393 :
394 0 : bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
395 :
396 0 : if (mColPos && lineBreakBeforeClose) {
397 0 : AppendNewLineToString(aStr);
398 : }
399 0 : if (!mColPos) {
400 0 : AppendIndentation(aStr);
401 : }
402 0 : else if (mAddSpace) {
403 0 : AppendToString(PRUnichar(' '), aStr);
404 0 : mAddSpace = false;
405 0 : }
406 : }
407 0 : else if (mAddSpace) {
408 0 : AppendToString(PRUnichar(' '), aStr);
409 0 : mAddSpace = false;
410 : }
411 :
412 0 : AppendToString(kEndTag, aStr);
413 0 : AppendToString(nsDependentAtomString(name), aStr);
414 0 : AppendToString(kGreaterThan, aStr);
415 :
416 0 : MaybeLeaveFromPreContent(content);
417 :
418 0 : if ((mDoFormat || forceFormat) && !mPreLevel
419 0 : && !mDoRaw && LineBreakAfterClose(ns, name)) {
420 0 : AppendNewLineToString(aStr);
421 : }
422 : else {
423 0 : MaybeFlagNewlineForRootNode(aElement);
424 : }
425 :
426 0 : if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
427 0 : --mInBody;
428 : }
429 :
430 0 : return NS_OK;
431 : }
432 :
433 : static const PRUint16 kValNBSP = 160;
434 : static const char* kEntities[] = {
435 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
436 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
437 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
438 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, "&", nsnull,
439 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
440 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
441 : "<", nsnull, ">", nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
442 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
443 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
444 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
445 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
446 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
447 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
448 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
449 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
450 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
451 : " "
452 : };
453 :
454 : static const char* kAttrEntities[] = {
455 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
456 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
457 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
458 : nsnull, nsnull, nsnull, nsnull, """, nsnull, nsnull, nsnull, "&", nsnull,
459 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
460 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
461 : "<", nsnull, ">", nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
462 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
463 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
464 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
465 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
466 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
467 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
468 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
469 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
470 : nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull, nsnull,
471 : " "
472 : };
473 :
474 0 : PRUint32 FindNextBasicEntity(const nsAString& aStr,
475 : const PRUint32 aLen,
476 : PRUint32 aIndex,
477 : const char** aEntityTable,
478 : const char** aEntity)
479 : {
480 0 : for (; aIndex < aLen; ++aIndex) {
481 : // for each character in this chunk, check if it
482 : // needs to be replaced
483 0 : PRUnichar val = aStr[aIndex];
484 0 : if (val <= kValNBSP && aEntityTable[val]) {
485 0 : *aEntity = aEntityTable[val];
486 0 : return aIndex;
487 : }
488 : }
489 0 : return aIndex;
490 : }
491 :
492 : void
493 0 : nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
494 : nsAString& aOutputStr)
495 : {
496 0 : if (mBodyOnly && !mInBody) {
497 0 : return;
498 : }
499 :
500 0 : if (mDisableEntityEncoding) {
501 0 : aOutputStr.Append(aStr);
502 0 : return;
503 : }
504 :
505 : bool nonBasicEntities =
506 : !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities |
507 : nsIDocumentEncoder::OutputEncodeHTMLEntities |
508 0 : nsIDocumentEncoder::OutputEncodeW3CEntities));
509 :
510 0 : if (!nonBasicEntities &&
511 : (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) {
512 0 : const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
513 0 : PRUint32 start = 0;
514 0 : const PRUint32 len = aStr.Length();
515 0 : for (PRUint32 i = 0; i < len; ++i) {
516 0 : const char* entity = nsnull;
517 0 : i = FindNextBasicEntity(aStr, len, i, entityTable, &entity);
518 0 : PRUint32 normalTextLen = i - start;
519 0 : if (normalTextLen) {
520 0 : aOutputStr.Append(Substring(aStr, start, normalTextLen));
521 : }
522 0 : if (entity) {
523 0 : aOutputStr.AppendASCII(entity);
524 0 : start = i + 1;
525 : }
526 : }
527 0 : return;
528 0 : } else if (nonBasicEntities) {
529 0 : nsIParserService* parserService = nsContentUtils::GetParserService();
530 :
531 0 : if (!parserService) {
532 0 : NS_ERROR("Can't get parser service");
533 0 : return;
534 : }
535 :
536 0 : nsReadingIterator<PRUnichar> done_reading;
537 0 : aStr.EndReading(done_reading);
538 :
539 : // for each chunk of |aString|...
540 0 : PRUint32 advanceLength = 0;
541 0 : nsReadingIterator<PRUnichar> iter;
542 :
543 0 : const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
544 0 : nsCAutoString entityReplacement;
545 :
546 0 : for (aStr.BeginReading(iter);
547 : iter != done_reading;
548 0 : iter.advance(PRInt32(advanceLength))) {
549 0 : PRUint32 fragmentLength = iter.size_forward();
550 0 : PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints
551 : // replaced by a particular entity
552 0 : const PRUnichar* c = iter.get();
553 0 : const PRUnichar* fragmentStart = c;
554 0 : const PRUnichar* fragmentEnd = c + fragmentLength;
555 0 : const char* entityText = nsnull;
556 0 : const char* fullConstEntityText = nsnull;
557 0 : char* fullEntityText = nsnull;
558 :
559 0 : advanceLength = 0;
560 : // for each character in this chunk, check if it
561 : // needs to be replaced
562 0 : for (; c < fragmentEnd; c++, advanceLength++) {
563 0 : PRUnichar val = *c;
564 0 : if (val <= kValNBSP && entityTable[val]) {
565 0 : fullConstEntityText = entityTable[val];
566 0 : break;
567 0 : } else if (val > 127 &&
568 : ((val < 256 &&
569 : mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
570 : mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
571 0 : entityReplacement.Truncate();
572 0 : parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
573 :
574 0 : if (!entityReplacement.IsEmpty()) {
575 0 : entityText = entityReplacement.get();
576 0 : break;
577 : }
578 : }
579 0 : else if (val > 127 &&
580 : mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
581 0 : mEntityConverter) {
582 0 : if (NS_IS_HIGH_SURROGATE(val) &&
583 0 : c + 1 < fragmentEnd &&
584 0 : NS_IS_LOW_SURROGATE(*(c + 1))) {
585 0 : PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
586 0 : if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
587 : nsIEntityConverter::entityW3C, &fullEntityText))) {
588 0 : lengthReplaced = 2;
589 0 : break;
590 : }
591 : else {
592 0 : advanceLength++;
593 0 : }
594 : }
595 0 : else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
596 : nsIEntityConverter::entityW3C,
597 : &fullEntityText))) {
598 0 : lengthReplaced = 1;
599 0 : break;
600 : }
601 : }
602 : }
603 :
604 0 : aOutputStr.Append(fragmentStart, advanceLength);
605 0 : if (entityText) {
606 0 : aOutputStr.Append(PRUnichar('&'));
607 0 : AppendASCIItoUTF16(entityText, aOutputStr);
608 0 : aOutputStr.Append(PRUnichar(';'));
609 0 : advanceLength++;
610 : }
611 0 : else if (fullConstEntityText) {
612 0 : aOutputStr.AppendASCII(fullConstEntityText);
613 0 : ++advanceLength;
614 : }
615 : // if it comes from nsIEntityConverter, it already has '&' and ';'
616 0 : else if (fullEntityText) {
617 0 : AppendASCIItoUTF16(fullEntityText, aOutputStr);
618 0 : nsMemory::Free(fullEntityText);
619 0 : advanceLength += lengthReplaced;
620 : }
621 : }
622 : } else {
623 0 : nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
624 : }
625 : }
|