1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=2 sw=2 et tw=80: */
3 : /* ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is mozilla.org code.
17 : *
18 : * The Initial Developer of the Original Code is
19 : * Netscape Communications Corporation.
20 : * Portions created by the Initial Developer are Copyright (C) 1998
21 : * the Initial Developer. All Rights Reserved.
22 : *
23 : * Contributor(s):
24 : * Ryan Jones <sciguyryan@gmail.com>
25 : * Laurent Jouanneau <laurent.jouanneau@disruptive-innovations.com>
26 : *
27 : * Alternatively, the contents of this file may be used under the terms of
28 : * either of the GNU General Public License Version 2 or later (the "GPL"),
29 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 : * in which case the provisions of the GPL or the LGPL are applicable instead
31 : * of those above. If you wish to allow use of your version of this file only
32 : * under the terms of either the GPL or the LGPL, and not to allow others to
33 : * use your version of this file under the terms of the MPL, indicate your
34 : * decision by deleting the provisions above and replace them with the notice
35 : * and other provisions required by the GPL or the LGPL. If you do not delete
36 : * the provisions above, a recipient may use your version of this file under
37 : * the terms of any one of the MPL, the GPL or the LGPL.
38 : *
39 : * ***** END LICENSE BLOCK ***** */
40 :
41 : /*
42 : * nsIContentSerializer implementation that can be used with an
43 : * nsIDocumentEncoder to convert an XHTML (not HTML!) DOM to an XHTML
44 : * string that could be parsed into more or less the original DOM.
45 : */
46 :
47 : #include "nsXHTMLContentSerializer.h"
48 :
49 : #include "nsIDOMElement.h"
50 : #include "nsIContent.h"
51 : #include "nsIDocument.h"
52 : #include "nsIDOMDocument.h"
53 : #include "nsINameSpaceManager.h"
54 : #include "nsString.h"
55 : #include "nsUnicharUtils.h"
56 : #include "nsXPIDLString.h"
57 : #include "nsIServiceManager.h"
58 : #include "nsIDocumentEncoder.h"
59 : #include "nsGkAtoms.h"
60 : #include "nsIURI.h"
61 : #include "nsNetUtil.h"
62 : #include "nsEscape.h"
63 : #include "nsITextToSubURI.h"
64 : #include "nsCRT.h"
65 : #include "nsIParserService.h"
66 : #include "nsContentUtils.h"
67 : #include "nsLWBrkCIID.h"
68 : #include "nsIScriptElement.h"
69 : #include "nsAttrName.h"
70 : #include "nsParserConstants.h"
71 :
72 : static const char kMozStr[] = "moz";
73 :
74 : static const PRInt32 kLongLineLen = 128;
75 :
76 : #define kXMLNS "xmlns"
77 :
78 215 : nsresult NS_NewXHTMLContentSerializer(nsIContentSerializer** aSerializer)
79 : {
80 215 : nsXHTMLContentSerializer* it = new nsXHTMLContentSerializer();
81 215 : if (!it) {
82 0 : return NS_ERROR_OUT_OF_MEMORY;
83 : }
84 :
85 215 : return CallQueryInterface(it, aSerializer);
86 : }
87 :
88 215 : nsXHTMLContentSerializer::nsXHTMLContentSerializer()
89 215 : : mIsHTMLSerializer(false)
90 : {
91 215 : }
92 :
93 645 : nsXHTMLContentSerializer::~nsXHTMLContentSerializer()
94 : {
95 215 : NS_ASSERTION(mOLStateStack.IsEmpty(), "Expected OL State stack to be empty");
96 860 : }
97 :
98 : NS_IMETHODIMP
99 215 : nsXHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
100 : const char* aCharSet, bool aIsCopying,
101 : bool aRewriteEncodingDeclaration)
102 : {
103 : // The previous version of the HTML serializer did implicit wrapping
104 : // when there is no flags, so we keep wrapping in order to keep
105 : // compatibility with the existing calling code
106 : // XXXLJ perhaps should we remove this default settings later ?
107 215 : if (aFlags & nsIDocumentEncoder::OutputFormatted ) {
108 0 : aFlags = aFlags | nsIDocumentEncoder::OutputWrap;
109 : }
110 :
111 : nsresult rv;
112 215 : rv = nsXMLContentSerializer::Init(aFlags, aWrapColumn, aCharSet, aIsCopying, aRewriteEncodingDeclaration);
113 215 : NS_ENSURE_SUCCESS(rv, rv);
114 :
115 215 : mRewriteEncodingDeclaration = aRewriteEncodingDeclaration;
116 215 : mIsCopying = aIsCopying;
117 215 : mIsFirstChildOfOL = false;
118 215 : mInBody = 0;
119 215 : mDisableEntityEncoding = 0;
120 : mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? true
121 215 : : false;
122 :
123 : // set up entity converter if we are going to need it
124 215 : if (mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities) {
125 0 : mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID);
126 : }
127 215 : return NS_OK;
128 : }
129 :
130 :
131 : // See if the string has any lines longer than longLineLen:
132 : // if so, we presume formatting is wonky (e.g. the node has been edited)
133 : // and we'd better rewrap the whole text node.
134 : bool
135 0 : nsXHTMLContentSerializer::HasLongLines(const nsString& text, PRInt32& aLastNewlineOffset)
136 : {
137 0 : PRUint32 start=0;
138 0 : PRUint32 theLen = text.Length();
139 0 : bool rv = false;
140 0 : aLastNewlineOffset = kNotFound;
141 0 : for (start = 0; start < theLen; ) {
142 0 : PRInt32 eol = text.FindChar('\n', start);
143 0 : if (eol < 0) {
144 0 : eol = text.Length();
145 : }
146 : else {
147 0 : aLastNewlineOffset = eol;
148 : }
149 0 : if (PRInt32(eol - start) > kLongLineLen)
150 0 : rv = true;
151 0 : start = eol + 1;
152 : }
153 0 : return rv;
154 : }
155 :
156 : NS_IMETHODIMP
157 13 : nsXHTMLContentSerializer::AppendText(nsIContent* aText,
158 : PRInt32 aStartOffset,
159 : PRInt32 aEndOffset,
160 : nsAString& aStr)
161 : {
162 13 : NS_ENSURE_ARG(aText);
163 :
164 26 : nsAutoString data;
165 : nsresult rv;
166 :
167 13 : rv = AppendTextData(aText, aStartOffset, aEndOffset, data, true);
168 13 : if (NS_FAILED(rv))
169 0 : return NS_ERROR_FAILURE;
170 :
171 13 : if (mPreLevel > 0 || mDoRaw) {
172 13 : AppendToStringConvertLF(data, aStr);
173 : }
174 0 : else if (mDoFormat) {
175 0 : AppendToStringFormatedWrapped(data, aStr);
176 : }
177 0 : else if (mDoWrap) {
178 0 : AppendToStringWrapped(data, aStr);
179 : }
180 : else {
181 0 : PRInt32 lastNewlineOffset = kNotFound;
182 0 : if (HasLongLines(data, lastNewlineOffset)) {
183 : // We have long lines, rewrap
184 0 : mDoWrap = true;
185 0 : AppendToStringWrapped(data, aStr);
186 0 : mDoWrap = false;
187 : }
188 : else {
189 0 : AppendToStringConvertLF(data, aStr);
190 : }
191 : }
192 :
193 13 : return NS_OK;
194 : }
195 :
196 : nsresult
197 0 : nsXHTMLContentSerializer::EscapeURI(nsIContent* aContent, const nsAString& aURI, nsAString& aEscapedURI)
198 : {
199 : // URL escape %xx cannot be used in JS.
200 : // No escaping if the scheme is 'javascript'.
201 0 : if (IsJavaScript(aContent, nsGkAtoms::href, kNameSpaceID_None, aURI)) {
202 0 : aEscapedURI = aURI;
203 0 : return NS_OK;
204 : }
205 :
206 : // nsITextToSubURI does charset convert plus uri escape
207 : // This is needed to convert to a document charset which is needed to support existing browsers.
208 : // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
209 : // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
210 0 : nsCOMPtr<nsITextToSubURI> textToSubURI;
211 0 : nsAutoString uri(aURI); // in order to use FindCharInSet()
212 0 : nsresult rv = NS_OK;
213 :
214 0 : if (!mCharset.IsEmpty() && !IsASCII(uri)) {
215 0 : textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
216 0 : NS_ENSURE_SUCCESS(rv, rv);
217 : }
218 :
219 0 : PRInt32 start = 0;
220 : PRInt32 end;
221 0 : nsAutoString part;
222 0 : nsXPIDLCString escapedURI;
223 0 : aEscapedURI.Truncate(0);
224 :
225 : // Loop and escape parts by avoiding escaping reserved characters
226 : // (and '%', '#', as well as '[' and ']' for IPv6 address literals).
227 0 : while ((end = uri.FindCharInSet("%#;/?:@&=+$,[]", start)) != -1) {
228 0 : part = Substring(aURI, start, (end-start));
229 0 : if (textToSubURI && !IsASCII(part)) {
230 0 : rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
231 0 : NS_ENSURE_SUCCESS(rv, rv);
232 : }
233 : else {
234 0 : escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
235 : }
236 0 : AppendASCIItoUTF16(escapedURI, aEscapedURI);
237 :
238 : // Append a reserved character without escaping.
239 0 : part = Substring(aURI, end, 1);
240 0 : aEscapedURI.Append(part);
241 0 : start = end + 1;
242 : }
243 :
244 0 : if (start < (PRInt32) aURI.Length()) {
245 : // Escape the remaining part.
246 0 : part = Substring(aURI, start, aURI.Length()-start);
247 0 : if (textToSubURI) {
248 0 : rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
249 0 : NS_ENSURE_SUCCESS(rv, rv);
250 : }
251 : else {
252 0 : escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
253 : }
254 0 : AppendASCIItoUTF16(escapedURI, aEscapedURI);
255 : }
256 :
257 0 : return rv;
258 : }
259 :
260 : void
261 412 : nsXHTMLContentSerializer::SerializeAttributes(nsIContent* aContent,
262 : nsIContent *aOriginalElement,
263 : nsAString& aTagPrefix,
264 : const nsAString& aTagNamespaceURI,
265 : nsIAtom* aTagName,
266 : nsAString& aStr,
267 : PRUint32 aSkipAttr,
268 : bool aAddNSAttr)
269 : {
270 : nsresult rv;
271 : PRUint32 index, count;
272 824 : nsAutoString prefixStr, uriStr, valueStr;
273 824 : nsAutoString xmlnsStr;
274 412 : xmlnsStr.AssignLiteral(kXMLNS);
275 :
276 412 : PRInt32 contentNamespaceID = aContent->GetNameSpaceID();
277 :
278 : // this method is not called by nsHTMLContentSerializer
279 : // so we don't have to check HTML element, just XHTML
280 :
281 412 : if (mIsCopying && kNameSpaceID_XHTML == contentNamespaceID) {
282 :
283 : // Need to keep track of OL and LI elements in order to get ordinal number
284 : // for the LI.
285 0 : if (aTagName == nsGkAtoms::ol) {
286 : // We are copying and current node is an OL;
287 : // Store its start attribute value in olState->startVal.
288 0 : nsAutoString start;
289 0 : PRInt32 startAttrVal = 0;
290 0 : aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
291 0 : if (!start.IsEmpty()) {
292 0 : PRInt32 rv = 0;
293 0 : startAttrVal = start.ToInteger(&rv);
294 : //If OL has "start" attribute, first LI element has to start with that value
295 : //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
296 : //In failure of ToInteger(), default StartAttrValue to 0.
297 0 : if (NS_SUCCEEDED(rv))
298 0 : --startAttrVal;
299 : else
300 0 : startAttrVal = 0;
301 : }
302 0 : olState state (startAttrVal, true);
303 0 : mOLStateStack.AppendElement(state);
304 : }
305 0 : else if (aTagName == nsGkAtoms::li) {
306 0 : mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
307 0 : if (mIsFirstChildOfOL) {
308 : // If OL is parent of this LI, serialize attributes in different manner.
309 0 : SerializeLIValueAttribute(aContent, aStr);
310 : }
311 : }
312 : }
313 :
314 : // If we had to add a new namespace declaration, serialize
315 : // and push it on the namespace stack
316 412 : if (aAddNSAttr) {
317 23 : if (aTagPrefix.IsEmpty()) {
318 : // Serialize default namespace decl
319 12 : SerializeAttr(EmptyString(), xmlnsStr, aTagNamespaceURI, aStr, true);
320 : } else {
321 : // Serialize namespace decl
322 11 : SerializeAttr(xmlnsStr, aTagPrefix, aTagNamespaceURI, aStr, true);
323 : }
324 23 : PushNameSpaceDecl(aTagPrefix, aTagNamespaceURI, aOriginalElement);
325 : }
326 :
327 824 : NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
328 :
329 412 : count = aContent->GetAttrCount();
330 :
331 : // Now serialize each of the attributes
332 : // XXX Unfortunately we need a namespace manager to get
333 : // attribute URIs.
334 2504 : for (index = 0; index < count; index++) {
335 :
336 2092 : if (aSkipAttr == index) {
337 8 : continue;
338 : }
339 :
340 2084 : const nsAttrName* name = aContent->GetAttrNameAt(index);
341 2084 : PRInt32 namespaceID = name->NamespaceID();
342 2084 : nsIAtom* attrName = name->LocalName();
343 2084 : nsIAtom* attrPrefix = name->GetPrefix();
344 :
345 : // Filter out any attribute starting with [-|_]moz
346 4168 : nsDependentAtomString attrNameStr(attrName);
347 8336 : if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
348 6252 : StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
349 0 : continue;
350 : }
351 :
352 2084 : if (attrPrefix) {
353 46 : attrPrefix->ToString(prefixStr);
354 : }
355 : else {
356 2038 : prefixStr.Truncate();
357 : }
358 :
359 2084 : bool addNSAttr = false;
360 2084 : if (kNameSpaceID_XMLNS != namespaceID) {
361 1876 : nsContentUtils::NameSpaceManager()->GetNameSpaceURI(namespaceID, uriStr);
362 1876 : addNSAttr = ConfirmPrefix(prefixStr, uriStr, aOriginalElement, true);
363 : }
364 :
365 2084 : aContent->GetAttr(namespaceID, attrName, valueStr);
366 :
367 4168 : nsDependentAtomString nameStr(attrName);
368 2084 : bool isJS = false;
369 :
370 2084 : if (kNameSpaceID_XHTML == contentNamespaceID) {
371 : //
372 : // Filter out special case of <br type="_moz"> or <br _moz*>,
373 : // used by the editor. Bug 16988. Yuck.
374 : //
375 6 : if (namespaceID == kNameSpaceID_None && aTagName == nsGkAtoms::br && attrName == nsGkAtoms::type
376 0 : && StringBeginsWith(valueStr, _mozStr)) {
377 0 : continue;
378 : }
379 :
380 6 : if (mIsCopying && mIsFirstChildOfOL && (aTagName == nsGkAtoms::li)
381 : && (attrName == nsGkAtoms::value)) {
382 : // This is handled separately in SerializeLIValueAttribute()
383 0 : continue;
384 : }
385 :
386 6 : isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
387 :
388 6 : if (namespaceID == kNameSpaceID_None &&
389 : ((attrName == nsGkAtoms::href) ||
390 : (attrName == nsGkAtoms::src))) {
391 : // Make all links absolute when converting only the selection:
392 0 : if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
393 : // Would be nice to handle OBJECT and APPLET tags,
394 : // but that gets more complicated since we have to
395 : // search the tag list for CODEBASE as well.
396 : // For now, just leave them relative.
397 0 : nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
398 0 : if (uri) {
399 0 : nsAutoString absURI;
400 0 : rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
401 0 : if (NS_SUCCEEDED(rv)) {
402 0 : valueStr = absURI;
403 : }
404 : }
405 : }
406 : // Need to escape URI.
407 0 : nsAutoString tempURI(valueStr);
408 0 : if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
409 0 : valueStr = tempURI;
410 : }
411 :
412 6 : if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
413 : attrName == nsGkAtoms::content) {
414 : // If we're serializing a <meta http-equiv="content-type">,
415 : // use the proper value, rather than what's in the document.
416 0 : nsAutoString header;
417 0 : aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
418 0 : if (header.LowerCaseEqualsLiteral("content-type")) {
419 0 : valueStr = NS_LITERAL_STRING("text/html; charset=") +
420 0 : NS_ConvertASCIItoUTF16(mCharset);
421 : }
422 : }
423 :
424 : // Expand shorthand attribute.
425 6 : if (namespaceID == kNameSpaceID_None && IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
426 0 : valueStr = nameStr;
427 : }
428 : }
429 : else {
430 2078 : isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
431 : }
432 :
433 2084 : SerializeAttr(prefixStr, nameStr, valueStr, aStr, !isJS);
434 :
435 2084 : if (addNSAttr) {
436 10 : NS_ASSERTION(!prefixStr.IsEmpty(),
437 : "Namespaced attributes must have a prefix");
438 10 : SerializeAttr(xmlnsStr, prefixStr, uriStr, aStr, true);
439 10 : PushNameSpaceDecl(prefixStr, uriStr, aOriginalElement);
440 : }
441 : }
442 412 : }
443 :
444 :
445 : void
446 412 : nsXHTMLContentSerializer::AppendEndOfElementStart(nsIContent *aOriginalElement,
447 : nsIAtom * aName,
448 : PRInt32 aNamespaceID,
449 : nsAString& aStr)
450 : {
451 : // this method is not called by nsHTMLContentSerializer
452 : // so we don't have to check HTML element, just XHTML
453 412 : NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
454 :
455 412 : if (kNameSpaceID_XHTML != aNamespaceID) {
456 : nsXMLContentSerializer::AppendEndOfElementStart(aOriginalElement, aName,
457 406 : aNamespaceID, aStr);
458 406 : return;
459 : }
460 :
461 6 : nsIContent* content = aOriginalElement;
462 :
463 : // for non empty elements, even if they are not a container, we always
464 : // serialize their content, because the XHTML element could contain non XHTML
465 : // nodes useful in some context, like in an XSLT stylesheet
466 6 : if (HasNoChildren(content)) {
467 :
468 2 : nsIParserService* parserService = nsContentUtils::GetParserService();
469 :
470 2 : if (parserService) {
471 : bool isContainer;
472 : parserService->
473 2 : IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(aName),
474 4 : isContainer);
475 2 : if (!isContainer) {
476 : // for backward compatibility with HTML 4 user agents
477 : // only non-container HTML elements can be closed immediatly,
478 : // and a space is added before />
479 0 : AppendToString(NS_LITERAL_STRING(" />"), aStr);
480 0 : return;
481 : }
482 : }
483 : }
484 6 : AppendToString(kGreaterThan, aStr);
485 : }
486 :
487 : void
488 412 : nsXHTMLContentSerializer::AfterElementStart(nsIContent * aContent,
489 : nsIContent *aOriginalElement,
490 : nsAString& aStr)
491 : {
492 412 : nsIAtom *name = aContent->Tag();
493 412 : if (aContent->GetNameSpaceID() == kNameSpaceID_XHTML &&
494 : mRewriteEncodingDeclaration &&
495 : name == nsGkAtoms::head) {
496 :
497 : // Check if there already are any content-type meta children.
498 : // If there are, they will be modified to use the correct charset.
499 : // If there aren't, we'll insert one here.
500 0 : bool hasMeta = false;
501 0 : for (nsIContent* child = aContent->GetFirstChild();
502 : child;
503 0 : child = child->GetNextSibling()) {
504 0 : if (child->IsHTML(nsGkAtoms::meta) &&
505 0 : child->HasAttr(kNameSpaceID_None, nsGkAtoms::content)) {
506 0 : nsAutoString header;
507 0 : child->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
508 :
509 0 : if (header.LowerCaseEqualsLiteral("content-type")) {
510 0 : hasMeta = true;
511 : break;
512 : }
513 : }
514 : }
515 :
516 0 : if (!hasMeta) {
517 0 : AppendNewLineToString(aStr);
518 0 : if (mDoFormat) {
519 0 : AppendIndentation(aStr);
520 : }
521 0 : AppendToString(NS_LITERAL_STRING("<meta http-equiv=\"content-type\""),
522 0 : aStr);
523 0 : AppendToString(NS_LITERAL_STRING(" content=\"text/html; charset="), aStr);
524 0 : AppendToString(NS_ConvertASCIItoUTF16(mCharset), aStr);
525 0 : if (mIsHTMLSerializer)
526 0 : AppendToString(NS_LITERAL_STRING("\">"), aStr);
527 : else
528 0 : AppendToString(NS_LITERAL_STRING("\" />"), aStr);
529 : }
530 : }
531 412 : }
532 :
533 : void
534 203 : nsXHTMLContentSerializer::AfterElementEnd(nsIContent * aContent,
535 : nsAString& aStr)
536 : {
537 203 : NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
538 :
539 203 : PRInt32 namespaceID = aContent->GetNameSpaceID();
540 203 : nsIAtom *name = aContent->Tag();
541 :
542 : // this method is not called by nsHTMLContentSerializer
543 : // so we don't have to check HTML element, just XHTML
544 203 : if (kNameSpaceID_XHTML == namespaceID && name == nsGkAtoms::body) {
545 0 : --mInBody;
546 : }
547 203 : }
548 :
549 :
550 : NS_IMETHODIMP
551 63 : nsXHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
552 : nsAString& aStr)
553 : {
554 63 : if (!mBodyOnly)
555 63 : return nsXMLContentSerializer::AppendDocumentStart(aDocument, aStr);
556 :
557 0 : return NS_OK;
558 : }
559 :
560 : bool
561 412 : nsXHTMLContentSerializer::CheckElementStart(nsIContent * aContent,
562 : bool & aForceFormat,
563 : nsAString& aStr)
564 : {
565 : // The _moz_dirty attribute is emitted by the editor to
566 : // indicate that this element should be pretty printed
567 : // even if we're not in pretty printing mode
568 : aForceFormat = aContent->HasAttr(kNameSpaceID_None,
569 412 : nsGkAtoms::mozdirty);
570 :
571 412 : nsIAtom *name = aContent->Tag();
572 412 : PRInt32 namespaceID = aContent->GetNameSpaceID();
573 :
574 412 : if (namespaceID == kNameSpaceID_XHTML) {
575 6 : if (name == nsGkAtoms::br && mPreLevel > 0 &&
576 : (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre)) {
577 0 : AppendNewLineToString(aStr);
578 0 : return false;
579 : }
580 :
581 6 : if (name == nsGkAtoms::body) {
582 0 : ++mInBody;
583 : }
584 : }
585 412 : return true;
586 : }
587 :
588 : bool
589 412 : nsXHTMLContentSerializer::CheckElementEnd(nsIContent * aContent,
590 : bool & aForceFormat,
591 : nsAString& aStr)
592 : {
593 412 : NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
594 :
595 : aForceFormat = aContent->HasAttr(kNameSpaceID_None,
596 412 : nsGkAtoms::mozdirty);
597 :
598 412 : nsIAtom *name = aContent->Tag();
599 412 : PRInt32 namespaceID = aContent->GetNameSpaceID();
600 :
601 : // this method is not called by nsHTMLContentSerializer
602 : // so we don't have to check HTML element, just XHTML
603 412 : if (namespaceID == kNameSpaceID_XHTML) {
604 6 : if (mIsCopying && name == nsGkAtoms::ol) {
605 0 : NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
606 : /* Though at this point we must always have an state to be deleted as all
607 : the OL opening tags are supposed to push an olState object to the stack*/
608 0 : if (!mOLStateStack.IsEmpty()) {
609 0 : mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
610 : }
611 : }
612 :
613 6 : if (HasNoChildren(aContent)) {
614 2 : nsIParserService* parserService = nsContentUtils::GetParserService();
615 :
616 2 : if (parserService) {
617 : bool isContainer;
618 :
619 : parserService->
620 2 : IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
621 4 : isContainer);
622 2 : if (!isContainer) {
623 : // non-container HTML elements are already closed,
624 : // see AppendEndOfElementStart
625 0 : return false;
626 : }
627 : }
628 : }
629 : // for backward compatibility with old HTML user agents,
630 : // empty elements should have an ending tag, so we mustn't call
631 : // nsXMLContentSerializer::CheckElementEnd
632 6 : return true;
633 : }
634 :
635 : bool dummyFormat;
636 406 : return nsXMLContentSerializer::CheckElementEnd(aContent, dummyFormat, aStr);
637 : }
638 :
639 : void
640 2130 : nsXHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
641 : nsAString& aOutputStr)
642 : {
643 2130 : if (mBodyOnly && !mInBody) {
644 0 : return;
645 : }
646 :
647 2130 : if (mDisableEntityEncoding) {
648 0 : aOutputStr.Append(aStr);
649 0 : return;
650 : }
651 :
652 2130 : nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
653 : }
654 :
655 : bool
656 0 : nsXHTMLContentSerializer::IsShorthandAttr(const nsIAtom* aAttrName,
657 : const nsIAtom* aElementName)
658 : {
659 : // checked
660 0 : if ((aAttrName == nsGkAtoms::checked) &&
661 : (aElementName == nsGkAtoms::input)) {
662 0 : return true;
663 : }
664 :
665 : // compact
666 0 : if ((aAttrName == nsGkAtoms::compact) &&
667 : (aElementName == nsGkAtoms::dir ||
668 : aElementName == nsGkAtoms::dl ||
669 : aElementName == nsGkAtoms::menu ||
670 : aElementName == nsGkAtoms::ol ||
671 : aElementName == nsGkAtoms::ul)) {
672 0 : return true;
673 : }
674 :
675 : // declare
676 0 : if ((aAttrName == nsGkAtoms::declare) &&
677 : (aElementName == nsGkAtoms::object)) {
678 0 : return true;
679 : }
680 :
681 : // defer
682 0 : if ((aAttrName == nsGkAtoms::defer) &&
683 : (aElementName == nsGkAtoms::script)) {
684 0 : return true;
685 : }
686 :
687 : // disabled
688 0 : if ((aAttrName == nsGkAtoms::disabled) &&
689 : (aElementName == nsGkAtoms::button ||
690 : aElementName == nsGkAtoms::input ||
691 : aElementName == nsGkAtoms::optgroup ||
692 : aElementName == nsGkAtoms::option ||
693 : aElementName == nsGkAtoms::select ||
694 : aElementName == nsGkAtoms::textarea)) {
695 0 : return true;
696 : }
697 :
698 : // ismap
699 0 : if ((aAttrName == nsGkAtoms::ismap) &&
700 : (aElementName == nsGkAtoms::img ||
701 : aElementName == nsGkAtoms::input)) {
702 0 : return true;
703 : }
704 :
705 : // multiple
706 0 : if ((aAttrName == nsGkAtoms::multiple) &&
707 : (aElementName == nsGkAtoms::select)) {
708 0 : return true;
709 : }
710 :
711 : // noresize
712 0 : if ((aAttrName == nsGkAtoms::noresize) &&
713 : (aElementName == nsGkAtoms::frame)) {
714 0 : return true;
715 : }
716 :
717 : // noshade
718 0 : if ((aAttrName == nsGkAtoms::noshade) &&
719 : (aElementName == nsGkAtoms::hr)) {
720 0 : return true;
721 : }
722 :
723 : // nowrap
724 0 : if ((aAttrName == nsGkAtoms::nowrap) &&
725 : (aElementName == nsGkAtoms::td ||
726 : aElementName == nsGkAtoms::th)) {
727 0 : return true;
728 : }
729 :
730 : // readonly
731 0 : if ((aAttrName == nsGkAtoms::readonly) &&
732 : (aElementName == nsGkAtoms::input ||
733 : aElementName == nsGkAtoms::textarea)) {
734 0 : return true;
735 : }
736 :
737 : // selected
738 0 : if ((aAttrName == nsGkAtoms::selected) &&
739 : (aElementName == nsGkAtoms::option)) {
740 0 : return true;
741 : }
742 :
743 : #ifdef MOZ_MEDIA
744 : // autoplay and controls
745 0 : if ((aElementName == nsGkAtoms::video || aElementName == nsGkAtoms::audio) &&
746 : (aAttrName == nsGkAtoms::autoplay || aAttrName == nsGkAtoms::muted ||
747 : aAttrName == nsGkAtoms::controls)) {
748 0 : return true;
749 : }
750 : #endif
751 :
752 0 : return false;
753 : }
754 :
755 : bool
756 412 : nsXHTMLContentSerializer::LineBreakBeforeOpen(PRInt32 aNamespaceID, nsIAtom* aName)
757 : {
758 :
759 412 : if (aNamespaceID != kNameSpaceID_XHTML) {
760 406 : return mAddSpace;
761 : }
762 :
763 6 : if (aName == nsGkAtoms::title ||
764 : aName == nsGkAtoms::meta ||
765 : aName == nsGkAtoms::link ||
766 : aName == nsGkAtoms::style ||
767 : aName == nsGkAtoms::select ||
768 : aName == nsGkAtoms::option ||
769 : aName == nsGkAtoms::script ||
770 : aName == nsGkAtoms::html) {
771 0 : return true;
772 : }
773 : else {
774 6 : nsIParserService* parserService = nsContentUtils::GetParserService();
775 :
776 6 : if (parserService) {
777 : bool res;
778 : parserService->
779 6 : IsBlock(parserService->HTMLCaseSensitiveAtomTagToId(aName), res);
780 6 : return res;
781 : }
782 : }
783 :
784 0 : return mAddSpace;
785 : }
786 :
787 : bool
788 0 : nsXHTMLContentSerializer::LineBreakAfterOpen(PRInt32 aNamespaceID, nsIAtom* aName)
789 : {
790 :
791 0 : if (aNamespaceID != kNameSpaceID_XHTML) {
792 0 : return false;
793 : }
794 :
795 0 : if ((aName == nsGkAtoms::html) ||
796 : (aName == nsGkAtoms::head) ||
797 : (aName == nsGkAtoms::body) ||
798 : (aName == nsGkAtoms::ul) ||
799 : (aName == nsGkAtoms::ol) ||
800 : (aName == nsGkAtoms::dl) ||
801 : (aName == nsGkAtoms::table) ||
802 : (aName == nsGkAtoms::tbody) ||
803 : (aName == nsGkAtoms::tr) ||
804 : (aName == nsGkAtoms::br) ||
805 : (aName == nsGkAtoms::meta) ||
806 : (aName == nsGkAtoms::link) ||
807 : (aName == nsGkAtoms::script) ||
808 : (aName == nsGkAtoms::select) ||
809 : (aName == nsGkAtoms::map) ||
810 : (aName == nsGkAtoms::area) ||
811 : (aName == nsGkAtoms::style)) {
812 0 : return true;
813 : }
814 :
815 0 : return false;
816 : }
817 :
818 : bool
819 0 : nsXHTMLContentSerializer::LineBreakBeforeClose(PRInt32 aNamespaceID, nsIAtom* aName)
820 : {
821 :
822 0 : if (aNamespaceID != kNameSpaceID_XHTML) {
823 0 : return false;
824 : }
825 :
826 0 : if ((aName == nsGkAtoms::html) ||
827 : (aName == nsGkAtoms::head) ||
828 : (aName == nsGkAtoms::body) ||
829 : (aName == nsGkAtoms::ul) ||
830 : (aName == nsGkAtoms::ol) ||
831 : (aName == nsGkAtoms::dl) ||
832 : (aName == nsGkAtoms::select) ||
833 : (aName == nsGkAtoms::table) ||
834 : (aName == nsGkAtoms::tbody)) {
835 0 : return true;
836 : }
837 0 : return false;
838 : }
839 :
840 : bool
841 0 : nsXHTMLContentSerializer::LineBreakAfterClose(PRInt32 aNamespaceID, nsIAtom* aName)
842 : {
843 :
844 0 : if (aNamespaceID != kNameSpaceID_XHTML) {
845 0 : return false;
846 : }
847 :
848 0 : if ((aName == nsGkAtoms::html) ||
849 : (aName == nsGkAtoms::head) ||
850 : (aName == nsGkAtoms::body) ||
851 : (aName == nsGkAtoms::tr) ||
852 : (aName == nsGkAtoms::th) ||
853 : (aName == nsGkAtoms::td) ||
854 : (aName == nsGkAtoms::pre) ||
855 : (aName == nsGkAtoms::title) ||
856 : (aName == nsGkAtoms::li) ||
857 : (aName == nsGkAtoms::dt) ||
858 : (aName == nsGkAtoms::dd) ||
859 : (aName == nsGkAtoms::blockquote) ||
860 : (aName == nsGkAtoms::select) ||
861 : (aName == nsGkAtoms::option) ||
862 : (aName == nsGkAtoms::p) ||
863 : (aName == nsGkAtoms::map) ||
864 : (aName == nsGkAtoms::div)) {
865 0 : return true;
866 : }
867 : else {
868 0 : nsIParserService* parserService = nsContentUtils::GetParserService();
869 :
870 0 : if (parserService) {
871 : bool res;
872 : parserService->
873 0 : IsBlock(parserService->HTMLCaseSensitiveAtomTagToId(aName), res);
874 0 : return res;
875 : }
876 : }
877 :
878 0 : return false;
879 : }
880 :
881 :
882 : void
883 412 : nsXHTMLContentSerializer::MaybeEnterInPreContent(nsIContent* aNode)
884 : {
885 :
886 412 : if (aNode->GetNameSpaceID() != kNameSpaceID_XHTML) {
887 406 : return;
888 : }
889 :
890 6 : nsIAtom *name = aNode->Tag();
891 :
892 6 : if (name == nsGkAtoms::pre ||
893 : name == nsGkAtoms::script ||
894 : name == nsGkAtoms::style ||
895 : name == nsGkAtoms::noscript ||
896 : name == nsGkAtoms::noframes
897 : ) {
898 0 : mPreLevel++;
899 : }
900 : }
901 :
902 : void
903 203 : nsXHTMLContentSerializer::MaybeLeaveFromPreContent(nsIContent* aNode)
904 : {
905 203 : if (aNode->GetNameSpaceID() != kNameSpaceID_XHTML) {
906 197 : return;
907 : }
908 :
909 6 : nsIAtom *name = aNode->Tag();
910 6 : if (name == nsGkAtoms::pre ||
911 : name == nsGkAtoms::script ||
912 : name == nsGkAtoms::style ||
913 : name == nsGkAtoms::noscript ||
914 : name == nsGkAtoms::noframes
915 : ) {
916 0 : --mPreLevel;
917 : }
918 : }
919 :
920 : void
921 0 : nsXHTMLContentSerializer::SerializeLIValueAttribute(nsIContent* aElement,
922 : nsAString& aStr)
923 : {
924 : // We are copying and we are at the "first" LI node of OL in selected range.
925 : // It may not be the first LI child of OL but it's first in the selected range.
926 : // Note that we get into this condition only once per a OL.
927 0 : bool found = false;
928 0 : nsCOMPtr<nsIDOMNode> currNode = do_QueryInterface(aElement);
929 0 : nsAutoString valueStr;
930 :
931 0 : olState state (0, false);
932 :
933 0 : if (!mOLStateStack.IsEmpty()) {
934 0 : state = mOLStateStack[mOLStateStack.Length()-1];
935 : // isFirstListItem should be true only before the serialization of the
936 : // first item in the list.
937 0 : state.isFirstListItem = false;
938 0 : mOLStateStack[mOLStateStack.Length()-1] = state;
939 : }
940 :
941 0 : PRInt32 startVal = state.startVal;
942 0 : PRInt32 offset = 0;
943 :
944 : // Traverse previous siblings until we find one with "value" attribute.
945 : // offset keeps track of how many previous siblings we had tocurrNode traverse.
946 0 : while (currNode && !found) {
947 0 : nsCOMPtr<nsIDOMElement> currElement = do_QueryInterface(currNode);
948 : // currElement may be null if it were a text node.
949 0 : if (currElement) {
950 0 : nsAutoString tagName;
951 0 : currElement->GetTagName(tagName);
952 0 : if (tagName.LowerCaseEqualsLiteral("li")) {
953 0 : currElement->GetAttribute(NS_LITERAL_STRING("value"), valueStr);
954 0 : if (valueStr.IsEmpty())
955 0 : offset++;
956 : else {
957 0 : found = true;
958 0 : PRInt32 rv = 0;
959 0 : startVal = valueStr.ToInteger(&rv);
960 : }
961 : }
962 : }
963 0 : nsCOMPtr<nsIDOMNode> tmp;
964 0 : currNode->GetPreviousSibling(getter_AddRefs(tmp));
965 0 : currNode.swap(tmp);
966 : }
967 : // If LI was not having "value", Set the "value" attribute for it.
968 : // Note that We are at the first LI in the selected range of OL.
969 0 : if (offset == 0 && found) {
970 : // offset = 0 => LI itself has the value attribute and we did not need to traverse back.
971 : // Just serialize value attribute like other tags.
972 0 : SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, false);
973 : }
974 0 : else if (offset == 1 && !found) {
975 : /*(offset = 1 && !found) means either LI is the first child node of OL
976 : and LI is not having "value" attribute.
977 : In that case we would not like to set "value" attribute to reduce the changes.
978 : */
979 : //do nothing...
980 : }
981 0 : else if (offset > 0) {
982 : // Set value attribute.
983 0 : nsAutoString valueStr;
984 :
985 : //As serializer needs to use this valueAttr we are creating here,
986 0 : valueStr.AppendInt(startVal + offset);
987 0 : SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, false);
988 : }
989 0 : }
990 :
991 : bool
992 0 : nsXHTMLContentSerializer::IsFirstChildOfOL(nsIContent* aElement)
993 : {
994 0 : nsCOMPtr<nsIDOMNode> node = do_QueryInterface(aElement);
995 0 : nsAutoString parentName;
996 :
997 0 : nsCOMPtr<nsIDOMNode> parentNode;
998 0 : node->GetParentNode(getter_AddRefs(parentNode));
999 0 : if (parentNode)
1000 0 : parentNode->GetNodeName(parentName);
1001 : else
1002 0 : return false;
1003 :
1004 0 : if (parentName.LowerCaseEqualsLiteral("ol")) {
1005 :
1006 0 : if (!mOLStateStack.IsEmpty()) {
1007 0 : olState state = mOLStateStack[mOLStateStack.Length()-1];
1008 0 : if (state.isFirstListItem)
1009 0 : return true;
1010 : }
1011 :
1012 0 : return false;
1013 : }
1014 : else
1015 0 : return false;
1016 : }
1017 :
1018 : bool
1019 12 : nsXHTMLContentSerializer::HasNoChildren(nsIContent * aContent) {
1020 :
1021 12 : for (nsIContent* child = aContent->GetFirstChild();
1022 : child;
1023 0 : child = child->GetNextSibling()) {
1024 :
1025 8 : if (!child->IsNodeOfType(nsINode::eTEXT))
1026 8 : return false;
1027 :
1028 0 : if (child->TextLength())
1029 0 : return false;
1030 : }
1031 :
1032 4 : return true;
1033 : }
|