1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Daniel Bratell <bratell@lysator.liu.se>
24 : * Ben Bucksch <mozilla@bucksch.org>
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either of the GNU General Public License Version 2 or later (the "GPL"),
28 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 :
40 : /*
41 : * nsIContentSerializer implementation that can be used with an
42 : * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
43 : * (eg for copy/paste as plaintext).
44 : */
45 :
46 : #include "nsPlainTextSerializer.h"
47 : #include "nsLWBrkCIID.h"
48 : #include "nsIServiceManager.h"
49 : #include "nsGkAtoms.h"
50 : #include "nsINameSpaceManager.h"
51 : #include "nsTextFragment.h"
52 : #include "nsContentUtils.h"
53 : #include "nsReadableUtils.h"
54 : #include "nsUnicharUtils.h"
55 : #include "nsCRT.h"
56 : #include "mozilla/dom/Element.h"
57 : #include "mozilla/Preferences.h"
58 :
59 : using namespace mozilla;
60 : using namespace mozilla::dom;
61 :
62 : #define PREF_STRUCTS "converter.html2txt.structs"
63 : #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
64 :
65 : static const PRInt32 kTabSize=4;
66 : static const PRInt32 kOLNumberWidth = 3;
67 : static const PRInt32 kIndentSizeHeaders = 2; /* Indention of h1, if
68 : mHeaderStrategy = 1 or = 2.
69 : Indention of other headers
70 : is derived from that.
71 : XXX center h1? */
72 : static const PRInt32 kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
73 : indent h(x+1) this many
74 : columns more than h(x) */
75 : static const PRInt32 kIndentSizeList = kTabSize;
76 : // Indention of non-first lines of ul and ol
77 : static const PRInt32 kIndentSizeDD = kTabSize; // Indention of <dd>
78 : static const PRUnichar kNBSP = 160;
79 : static const PRUnichar kSPACE = ' ';
80 :
81 : static PRInt32 HeaderLevel(nsIAtom* aTag);
82 : static PRInt32 GetUnicharWidth(PRUnichar ucs);
83 : static PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n);
84 :
85 : // Someday may want to make this non-const:
86 : static const PRUint32 TagStackSize = 500;
87 : static const PRUint32 OLStackSize = 100;
88 :
89 234 : nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
90 : {
91 234 : nsPlainTextSerializer* it = new nsPlainTextSerializer();
92 234 : if (!it) {
93 0 : return NS_ERROR_OUT_OF_MEMORY;
94 : }
95 :
96 234 : return CallQueryInterface(it, aSerializer);
97 : }
98 :
99 234 : nsPlainTextSerializer::nsPlainTextSerializer()
100 234 : : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
101 : {
102 :
103 234 : mOutputString = nsnull;
104 234 : mHeadLevel = 0;
105 234 : mAtFirstColumn = true;
106 234 : mIndent = 0;
107 234 : mCiteQuoteLevel = 0;
108 234 : mStructs = true; // will be read from prefs later
109 234 : mHeaderStrategy = 1 /*indent increasingly*/; // ditto
110 234 : mDontWrapAnyQuotes = false; // ditto
111 234 : mHasWrittenCiteBlockquote = false;
112 234 : mSpanLevel = 0;
113 1872 : for (PRInt32 i = 0; i <= 6; i++) {
114 1638 : mHeaderCounter[i] = 0;
115 : }
116 :
117 : // Line breaker
118 234 : mWrapColumn = 72; // XXX magic number, we expect someone to reset this
119 234 : mCurrentLineWidth = 0;
120 :
121 : // Flow
122 234 : mEmptyLines = 1; // The start of the document is an "empty line" in itself,
123 234 : mInWhitespace = false;
124 234 : mPreFormatted = false;
125 234 : mStartedOutput = false;
126 :
127 : // initialize the tag stack to zero:
128 : // The stack only ever contains pointers to static atoms, so they don't
129 : // need refcounting.
130 234 : mTagStack = new nsIAtom*[TagStackSize];
131 234 : mTagStackIndex = 0;
132 234 : mIgnoreAboveIndex = (PRUint32)kNotFound;
133 :
134 : // initialize the OL stack, where numbers for ordered lists are kept
135 234 : mOLStack = new PRInt32[OLStackSize];
136 234 : mOLStackIndex = 0;
137 :
138 234 : mULCount = 0;
139 234 : }
140 :
141 702 : nsPlainTextSerializer::~nsPlainTextSerializer()
142 : {
143 234 : delete[] mTagStack;
144 234 : delete[] mOLStack;
145 234 : NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!");
146 936 : }
147 :
148 2106 : NS_IMPL_ISUPPORTS1(nsPlainTextSerializer,
149 : nsIContentSerializer)
150 :
151 :
152 : NS_IMETHODIMP
153 234 : nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
154 : const char* aCharSet, bool aIsCopying,
155 : bool aIsWholeDocument)
156 : {
157 : #ifdef DEBUG
158 : // Check if the major control flags are set correctly.
159 234 : if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
160 2 : NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
161 : "If you want format=flowed, you must combine it with "
162 : "nsIDocumentEncoder::OutputFormatted");
163 : }
164 :
165 234 : if (aFlags & nsIDocumentEncoder::OutputFormatted) {
166 2 : NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
167 : "Can't do formatted and preformatted output at the same time!");
168 : }
169 : #endif
170 :
171 234 : mFlags = aFlags;
172 234 : mWrapColumn = aWrapColumn;
173 :
174 : // Only create a linebreaker if we will handle wrapping.
175 234 : if (MayWrap()) {
176 2 : mLineBreaker = nsContentUtils::LineBreaker();
177 : }
178 :
179 : // Set the line break character:
180 234 : if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
181 : && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
182 : // Windows
183 2 : mLineBreak.AssignLiteral("\r\n");
184 : }
185 232 : else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
186 : // Mac
187 0 : mLineBreak.Assign(PRUnichar('\r'));
188 : }
189 232 : else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
190 : // Unix/DOM
191 0 : mLineBreak.Assign(PRUnichar('\n'));
192 : }
193 : else {
194 : // Platform/default
195 232 : mLineBreak.AssignLiteral(NS_LINEBREAK);
196 : }
197 :
198 234 : mLineBreakDue = false;
199 234 : mFloatingLines = -1;
200 :
201 234 : if (mFlags & nsIDocumentEncoder::OutputFormatted) {
202 : // Get some prefs that controls how we do formatted output
203 2 : mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
204 :
205 : mHeaderStrategy =
206 2 : Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
207 :
208 : // DontWrapAnyQuotes is set according to whether plaintext mail
209 : // is wrapping to window width -- see bug 134439.
210 : // We'll only want this if we're wrapping and formatted.
211 2 : if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
212 : mDontWrapAnyQuotes =
213 : Preferences::GetBool("mail.compose.wrap_to_window_width",
214 2 : mDontWrapAnyQuotes);
215 : }
216 : }
217 :
218 : // XXX We should let the caller pass this in.
219 234 : if (Preferences::GetBool("browser.frames.enabled")) {
220 234 : mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
221 : }
222 : else {
223 0 : mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
224 : }
225 :
226 234 : return NS_OK;
227 : }
228 :
229 : bool
230 0 : nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
231 : {
232 0 : PRUint32 size = aStack.Length();
233 0 : if (size == 0) {
234 0 : return false;
235 : }
236 0 : return aStack.ElementAt(size-1);
237 : }
238 :
239 : void
240 0 : nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
241 : {
242 0 : PRUint32 size = aStack.Length();
243 0 : if (size > 0) {
244 0 : aStack.ElementAt(size-1) = aValue;
245 : }
246 : else {
247 0 : NS_ERROR("There is no \"Last\" value");
248 : }
249 0 : }
250 :
251 : void
252 0 : nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
253 : {
254 0 : aStack.AppendElement(bool(aValue));
255 0 : }
256 :
257 : bool
258 0 : nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
259 : {
260 0 : bool returnValue = false;
261 0 : PRUint32 size = aStack.Length();
262 0 : if (size > 0) {
263 0 : returnValue = aStack.ElementAt(size-1);
264 0 : aStack.RemoveElementAt(size-1);
265 : }
266 0 : return returnValue;
267 : }
268 :
269 : NS_IMETHODIMP
270 286 : nsPlainTextSerializer::AppendText(nsIContent* aText,
271 : PRInt32 aStartOffset,
272 : PRInt32 aEndOffset,
273 : nsAString& aStr)
274 : {
275 286 : if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
276 0 : return NS_OK;
277 : }
278 :
279 286 : NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
280 286 : if ( aStartOffset < 0 )
281 0 : return NS_ERROR_INVALID_ARG;
282 :
283 286 : NS_ENSURE_ARG(aText);
284 :
285 286 : nsresult rv = NS_OK;
286 :
287 286 : nsIContent* content = aText;
288 : const nsTextFragment* frag;
289 286 : if (!content || !(frag = content->GetText())) {
290 0 : return NS_ERROR_FAILURE;
291 : }
292 :
293 286 : PRInt32 endoffset = (aEndOffset == -1) ? frag->GetLength() : aEndOffset;
294 286 : NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
295 :
296 286 : PRInt32 length = endoffset - aStartOffset;
297 286 : if (length <= 0) {
298 0 : return NS_OK;
299 : }
300 :
301 572 : nsAutoString textstr;
302 286 : if (frag->Is2b()) {
303 1 : textstr.Assign(frag->Get2b() + aStartOffset, length);
304 : }
305 : else {
306 : // AssignASCII is for 7-bit character only, so don't use it
307 285 : const char *data = frag->Get1b();
308 285 : CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
309 : }
310 :
311 286 : mOutputString = &aStr;
312 :
313 : // We have to split the string across newlines
314 : // to match parser behavior
315 286 : PRInt32 start = 0;
316 286 : PRInt32 offset = textstr.FindCharInSet("\n\r");
317 576 : while (offset != kNotFound) {
318 :
319 4 : if (offset>start) {
320 : // Pass in the line
321 : DoAddText(false,
322 0 : Substring(textstr, start, offset-start));
323 : }
324 :
325 : // Pass in a newline
326 4 : DoAddText(true, mLineBreak);
327 :
328 4 : start = offset+1;
329 4 : offset = textstr.FindCharInSet("\n\r", start);
330 : }
331 :
332 : // Consume the last bit of the string if there's any left
333 286 : if (start < length) {
334 285 : if (start) {
335 2 : DoAddText(false, Substring(textstr, start, length - start));
336 : }
337 : else {
338 283 : DoAddText(false, textstr);
339 : }
340 : }
341 :
342 286 : mOutputString = nsnull;
343 :
344 286 : return rv;
345 : }
346 :
347 : NS_IMETHODIMP
348 0 : nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
349 : PRInt32 aStartOffset,
350 : PRInt32 aEndOffset,
351 : nsAString& aStr)
352 : {
353 0 : return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
354 : }
355 :
356 : NS_IMETHODIMP
357 780 : nsPlainTextSerializer::AppendElementStart(Element* aElement,
358 : Element* aOriginalElement,
359 : nsAString& aStr)
360 : {
361 780 : NS_ENSURE_ARG(aElement);
362 :
363 780 : mElement = aElement;
364 :
365 : nsresult rv;
366 780 : nsIAtom* id = GetIdForContent(mElement);
367 :
368 780 : bool isContainer = !nsContentUtils::IsHTMLVoid(id);
369 :
370 780 : mOutputString = &aStr;
371 :
372 780 : if (isContainer) {
373 737 : rv = DoOpenContainer(id);
374 : }
375 : else {
376 43 : rv = DoAddLeaf(id);
377 : }
378 :
379 780 : mElement = nsnull;
380 780 : mOutputString = nsnull;
381 :
382 780 : if (id == nsGkAtoms::head) {
383 234 : ++mHeadLevel;
384 : }
385 :
386 780 : return rv;
387 : }
388 :
389 : NS_IMETHODIMP
390 780 : nsPlainTextSerializer::AppendElementEnd(Element* aElement,
391 : nsAString& aStr)
392 : {
393 780 : NS_ENSURE_ARG(aElement);
394 :
395 780 : mElement = aElement;
396 :
397 : nsresult rv;
398 780 : nsIAtom* id = GetIdForContent(mElement);
399 :
400 780 : bool isContainer = !nsContentUtils::IsHTMLVoid(id);
401 :
402 780 : mOutputString = &aStr;
403 :
404 780 : rv = NS_OK;
405 780 : if (isContainer) {
406 737 : rv = DoCloseContainer(id);
407 : }
408 :
409 780 : mElement = nsnull;
410 780 : mOutputString = nsnull;
411 :
412 780 : if (id == nsGkAtoms::head) {
413 234 : --mHeadLevel;
414 : NS_ASSERTION(mHeadLevel >= 0, "mHeadLevel < 0");
415 : }
416 :
417 780 : return rv;
418 : }
419 :
420 : NS_IMETHODIMP
421 234 : nsPlainTextSerializer::Flush(nsAString& aStr)
422 : {
423 234 : mOutputString = &aStr;
424 234 : FlushLine();
425 234 : mOutputString = nsnull;
426 234 : return NS_OK;
427 : }
428 :
429 : NS_IMETHODIMP
430 234 : nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
431 : nsAString& aStr)
432 : {
433 234 : return NS_OK;
434 : }
435 :
436 : nsresult
437 737 : nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag)
438 : {
439 737 : if (mFlags & nsIDocumentEncoder::OutputRaw) {
440 : // Raw means raw. Don't even think about doing anything fancy
441 : // here like indenting, adding line breaks or any other
442 : // characters such as list item bullets, quote characters
443 : // around <q>, etc. I mean it! Don't make me smack you!
444 :
445 0 : return NS_OK;
446 : }
447 :
448 737 : if (mTagStackIndex < TagStackSize) {
449 737 : mTagStack[mTagStackIndex++] = aTag;
450 : }
451 :
452 737 : if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
453 0 : return NS_OK;
454 : }
455 :
456 : // Reset this so that <blockquote type=cite> doesn't affect the whitespace
457 : // above random <pre>s below it.
458 : mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
459 737 : aTag == nsGkAtoms::pre;
460 :
461 737 : bool isInCiteBlockquote = false;
462 :
463 : // XXX special-case <blockquote type=cite> so that we don't add additional
464 : // newlines before the text.
465 737 : if (aTag == nsGkAtoms::blockquote) {
466 0 : nsAutoString value;
467 0 : nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
468 0 : isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
469 : }
470 :
471 737 : if (mLineBreakDue && !isInCiteBlockquote)
472 8 : EnsureVerticalSpace(mFloatingLines);
473 :
474 : // Check if this tag's content that should not be output
475 737 : if ((aTag == nsGkAtoms::noscript &&
476 0 : !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
477 : ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
478 0 : !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
479 : // Ignore everything that follows the current tag in
480 : // question until a matching end tag is encountered.
481 0 : mIgnoreAboveIndex = mTagStackIndex - 1;
482 0 : return NS_OK;
483 : }
484 :
485 737 : if (aTag == nsGkAtoms::body) {
486 : // Try to figure out here whether we have a
487 : // preformatted style attribute.
488 : //
489 : // Trigger on the presence of a "pre-wrap" in the
490 : // style attribute. That's a very simplistic way to do
491 : // it, but better than nothing.
492 : // Also set mWrapColumn to the value given there
493 : // (which arguably we should only do if told to do so).
494 468 : nsAutoString style;
495 : PRInt32 whitespace;
496 234 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
497 : (kNotFound != (whitespace = style.Find("white-space:")))) {
498 :
499 0 : if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
500 : #ifdef DEBUG_preformatted
501 : printf("Set mPreFormatted based on style pre-wrap\n");
502 : #endif
503 0 : mPreFormatted = true;
504 0 : PRInt32 widthOffset = style.Find("width:");
505 0 : if (widthOffset >= 0) {
506 : // We have to search for the ch before the semicolon,
507 : // not for the semicolon itself, because nsString::ToInteger()
508 : // considers 'c' to be a valid numeric char (even if radix=10)
509 : // but then gets confused if it sees it next to the number
510 : // when the radix specified was 10, and returns an error code.
511 0 : PRInt32 semiOffset = style.Find("ch", false, widthOffset+6);
512 : PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6
513 0 : : style.Length() - widthOffset);
514 0 : nsAutoString widthstr;
515 0 : style.Mid(widthstr, widthOffset+6, length);
516 : PRInt32 err;
517 0 : PRInt32 col = widthstr.ToInteger(&err);
518 :
519 0 : if (NS_SUCCEEDED(err)) {
520 0 : mWrapColumn = (PRUint32)col;
521 : #ifdef DEBUG_preformatted
522 : printf("Set wrap column to %d based on style\n", mWrapColumn);
523 : #endif
524 : }
525 : }
526 : }
527 0 : else if (kNotFound != style.Find("pre", true, whitespace)) {
528 : #ifdef DEBUG_preformatted
529 : printf("Set mPreFormatted based on style pre\n");
530 : #endif
531 0 : mPreFormatted = true;
532 0 : mWrapColumn = 0;
533 : }
534 : }
535 : else {
536 : /* See comment at end of function. */
537 234 : mInWhitespace = true;
538 234 : mPreFormatted = false;
539 : }
540 :
541 234 : return NS_OK;
542 : }
543 :
544 : // Keep this in sync with DoCloseContainer!
545 503 : if (!DoOutput()) {
546 0 : return NS_OK;
547 : }
548 :
549 503 : if (aTag == nsGkAtoms::p)
550 18 : EnsureVerticalSpace(1);
551 485 : else if (aTag == nsGkAtoms::pre) {
552 0 : if (GetLastBool(mIsInCiteBlockquote))
553 0 : EnsureVerticalSpace(0);
554 0 : else if (mHasWrittenCiteBlockquote) {
555 0 : EnsureVerticalSpace(0);
556 0 : mHasWrittenCiteBlockquote = false;
557 : }
558 : else
559 0 : EnsureVerticalSpace(1);
560 : }
561 485 : else if (aTag == nsGkAtoms::tr) {
562 0 : PushBool(mHasWrittenCellsForRow, false);
563 : }
564 485 : else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
565 : // We must make sure that the content of two table cells get a
566 : // space between them.
567 :
568 : // To make the separation between cells most obvious and
569 : // importable, we use a TAB.
570 0 : if (GetLastBool(mHasWrittenCellsForRow)) {
571 : // Bypass |Write| so that the TAB isn't compressed away.
572 0 : AddToLine(NS_LITERAL_STRING("\t").get(), 1);
573 0 : mInWhitespace = true;
574 : }
575 0 : else if (mHasWrittenCellsForRow.IsEmpty()) {
576 : // We don't always see a <tr> (nor a <table>) before the <td> if we're
577 : // copying part of a table
578 0 : PushBool(mHasWrittenCellsForRow, true); // will never be popped
579 : }
580 : else {
581 0 : SetLastBool(mHasWrittenCellsForRow, true);
582 : }
583 : }
584 485 : else if (aTag == nsGkAtoms::ul) {
585 : // Indent here to support nested lists, which aren't included in li :-(
586 4 : EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
587 : // Must end the current line before we change indention
588 4 : mIndent += kIndentSizeList;
589 4 : mULCount++;
590 : }
591 481 : else if (aTag == nsGkAtoms::ol) {
592 0 : EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
593 0 : if (mFlags & nsIDocumentEncoder::OutputFormatted) {
594 : // Must end the current line before we change indention
595 0 : if (mOLStackIndex < OLStackSize) {
596 0 : nsAutoString startAttr;
597 0 : PRInt32 startVal = 1;
598 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
599 0 : PRInt32 rv = 0;
600 0 : startVal = startAttr.ToInteger(&rv);
601 0 : if (NS_FAILED(rv))
602 0 : startVal = 1;
603 : }
604 0 : mOLStack[mOLStackIndex++] = startVal;
605 : }
606 : } else {
607 0 : mOLStackIndex++;
608 : }
609 0 : mIndent += kIndentSizeList; // see ul
610 : }
611 481 : else if (aTag == nsGkAtoms::li &&
612 : (mFlags & nsIDocumentEncoder::OutputFormatted)) {
613 0 : if (mTagStackIndex > 1 && IsInOL()) {
614 0 : if (mOLStackIndex > 0) {
615 0 : nsAutoString valueAttr;
616 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
617 0 : PRInt32 rv = 0;
618 0 : PRInt32 valueAttrVal = valueAttr.ToInteger(&rv);
619 0 : if (NS_SUCCEEDED(rv))
620 0 : mOLStack[mOLStackIndex-1] = valueAttrVal;
621 : }
622 : // This is what nsBulletFrame does for OLs:
623 0 : mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
624 : }
625 : else {
626 0 : mInIndentString.Append(PRUnichar('#'));
627 : }
628 :
629 0 : mInIndentString.Append(PRUnichar('.'));
630 :
631 : }
632 : else {
633 : static char bulletCharArray[] = "*o+#";
634 0 : PRUint32 index = mULCount > 0 ? (mULCount - 1) : 3;
635 0 : char bulletChar = bulletCharArray[index % 4];
636 0 : mInIndentString.Append(PRUnichar(bulletChar));
637 : }
638 :
639 0 : mInIndentString.Append(PRUnichar(' '));
640 : }
641 481 : else if (aTag == nsGkAtoms::dl) {
642 0 : EnsureVerticalSpace(1);
643 : }
644 481 : else if (aTag == nsGkAtoms::dt) {
645 0 : EnsureVerticalSpace(0);
646 : }
647 481 : else if (aTag == nsGkAtoms::dd) {
648 0 : EnsureVerticalSpace(0);
649 0 : mIndent += kIndentSizeDD;
650 : }
651 481 : else if (aTag == nsGkAtoms::span) {
652 1 : ++mSpanLevel;
653 : }
654 480 : else if (aTag == nsGkAtoms::blockquote) {
655 : // Push
656 0 : PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
657 0 : if (isInCiteBlockquote) {
658 0 : EnsureVerticalSpace(0);
659 0 : mCiteQuoteLevel++;
660 : }
661 : else {
662 0 : EnsureVerticalSpace(1);
663 0 : mIndent += kTabSize; // Check for some maximum value?
664 : }
665 : }
666 480 : else if (aTag == nsGkAtoms::q) {
667 0 : Write(NS_LITERAL_STRING("\""));
668 : }
669 :
670 : // Else make sure we'll separate block level tags,
671 : // even if we're about to leave, before doing any other formatting.
672 480 : else if (nsContentUtils::IsHTMLBlock(aTag)) {
673 12 : EnsureVerticalSpace(0);
674 : }
675 :
676 : //////////////////////////////////////////////////////////////
677 503 : if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
678 499 : return NS_OK;
679 : }
680 : //////////////////////////////////////////////////////////////
681 : // The rest of this routine is formatted output stuff,
682 : // which we should skip if we're not formatted:
683 : //////////////////////////////////////////////////////////////
684 :
685 : // Push on stack
686 4 : bool currentNodeIsConverted = IsCurrentNodeConverted();
687 :
688 4 : if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
689 : aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
690 : aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
691 : {
692 0 : EnsureVerticalSpace(2);
693 0 : if (mHeaderStrategy == 2) { // numbered
694 0 : mIndent += kIndentSizeHeaders;
695 : // Caching
696 0 : PRInt32 level = HeaderLevel(aTag);
697 : // Increase counter for current level
698 0 : mHeaderCounter[level]++;
699 : // Reset all lower levels
700 : PRInt32 i;
701 :
702 0 : for (i = level + 1; i <= 6; i++) {
703 0 : mHeaderCounter[i] = 0;
704 : }
705 :
706 : // Construct numbers
707 0 : nsAutoString leadup;
708 0 : for (i = 1; i <= level; i++) {
709 0 : leadup.AppendInt(mHeaderCounter[i]);
710 0 : leadup.Append(PRUnichar('.'));
711 : }
712 0 : leadup.Append(PRUnichar(' '));
713 0 : Write(leadup);
714 : }
715 0 : else if (mHeaderStrategy == 1) { // indent increasingly
716 0 : mIndent += kIndentSizeHeaders;
717 0 : for (PRInt32 i = HeaderLevel(aTag); i > 1; i--) {
718 : // for h(x), run x-1 times
719 0 : mIndent += kIndentIncrementHeaders;
720 : }
721 0 : }
722 : }
723 4 : else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
724 0 : nsAutoString url;
725 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
726 0 : && !url.IsEmpty()) {
727 0 : mURL = url;
728 0 : }
729 : }
730 4 : else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
731 0 : Write(NS_LITERAL_STRING("^"));
732 : }
733 4 : else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
734 0 : Write(NS_LITERAL_STRING("_"));
735 : }
736 4 : else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
737 0 : Write(NS_LITERAL_STRING("|"));
738 : }
739 4 : else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
740 0 : && mStructs && !currentNodeIsConverted) {
741 0 : Write(NS_LITERAL_STRING("*"));
742 : }
743 4 : else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
744 0 : && mStructs && !currentNodeIsConverted) {
745 0 : Write(NS_LITERAL_STRING("/"));
746 : }
747 4 : else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
748 0 : Write(NS_LITERAL_STRING("_"));
749 : }
750 :
751 : /* Container elements are always block elements, so we shouldn't
752 : output any whitespace immediately after the container tag even if
753 : there's extra whitespace there because the HTML is pretty-printed
754 : or something. To ensure that happens, tell the serializer we're
755 : already in whitespace so it won't output more. */
756 4 : mInWhitespace = true;
757 :
758 4 : return NS_OK;
759 : }
760 :
761 : nsresult
762 737 : nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
763 : {
764 737 : if (mFlags & nsIDocumentEncoder::OutputRaw) {
765 : // Raw means raw. Don't even think about doing anything fancy
766 : // here like indenting, adding line breaks or any other
767 : // characters such as list item bullets, quote characters
768 : // around <q>, etc. I mean it! Don't make me smack you!
769 :
770 0 : return NS_OK;
771 : }
772 :
773 737 : if (mTagStackIndex > 0) {
774 737 : --mTagStackIndex;
775 : }
776 :
777 737 : if (mTagStackIndex >= mIgnoreAboveIndex) {
778 0 : if (mTagStackIndex == mIgnoreAboveIndex) {
779 : // We're dealing with the close tag whose matching
780 : // open tag had set the mIgnoreAboveIndex value.
781 : // Reset mIgnoreAboveIndex before discarding this tag.
782 0 : mIgnoreAboveIndex = (PRUint32)kNotFound;
783 : }
784 0 : return NS_OK;
785 : }
786 :
787 : // End current line if we're ending a block level tag
788 737 : if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
789 : // We want the output to end with a new line,
790 : // but in preformatted areas like text fields,
791 : // we can't emit newlines that weren't there.
792 : // So add the newline only in the case of formatted output.
793 468 : if (mFlags & nsIDocumentEncoder::OutputFormatted) {
794 4 : EnsureVerticalSpace(0);
795 : }
796 : else {
797 464 : FlushLine();
798 : }
799 : // We won't want to do anything with these in formatted mode either,
800 : // so just return now:
801 468 : return NS_OK;
802 : }
803 :
804 : // Keep this in sync with DoOpenContainer!
805 269 : if (!DoOutput()) {
806 234 : return NS_OK;
807 : }
808 :
809 35 : if (aTag == nsGkAtoms::tr) {
810 0 : PopBool(mHasWrittenCellsForRow);
811 : // Should always end a line, but get no more whitespace
812 0 : if (mFloatingLines < 0)
813 0 : mFloatingLines = 0;
814 0 : mLineBreakDue = true;
815 : }
816 35 : else if (((aTag == nsGkAtoms::li) ||
817 : (aTag == nsGkAtoms::dt)) &&
818 : (mFlags & nsIDocumentEncoder::OutputFormatted)) {
819 : // Items that should always end a line, but get no more whitespace
820 0 : if (mFloatingLines < 0)
821 0 : mFloatingLines = 0;
822 0 : mLineBreakDue = true;
823 : }
824 35 : else if (aTag == nsGkAtoms::pre) {
825 0 : mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
826 0 : mLineBreakDue = true;
827 : }
828 35 : else if (aTag == nsGkAtoms::ul) {
829 4 : FlushLine();
830 4 : mIndent -= kIndentSizeList;
831 4 : if (--mULCount + mOLStackIndex == 0) {
832 4 : mFloatingLines = 1;
833 4 : mLineBreakDue = true;
834 : }
835 : }
836 31 : else if (aTag == nsGkAtoms::ol) {
837 0 : FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
838 0 : mIndent -= kIndentSizeList;
839 0 : NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
840 0 : mOLStackIndex--;
841 0 : if (mULCount + mOLStackIndex == 0) {
842 0 : mFloatingLines = 1;
843 0 : mLineBreakDue = true;
844 : }
845 : }
846 31 : else if (aTag == nsGkAtoms::dl) {
847 0 : mFloatingLines = 1;
848 0 : mLineBreakDue = true;
849 : }
850 31 : else if (aTag == nsGkAtoms::dd) {
851 0 : FlushLine();
852 0 : mIndent -= kIndentSizeDD;
853 : }
854 31 : else if (aTag == nsGkAtoms::span) {
855 1 : NS_ASSERTION(mSpanLevel, "Span level will be negative!");
856 1 : --mSpanLevel;
857 : }
858 30 : else if (aTag == nsGkAtoms::div) {
859 0 : if (mFloatingLines < 0)
860 0 : mFloatingLines = 0;
861 0 : mLineBreakDue = true;
862 : }
863 30 : else if (aTag == nsGkAtoms::blockquote) {
864 0 : FlushLine(); // Is this needed?
865 :
866 : // Pop
867 0 : bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
868 :
869 0 : if (isInCiteBlockquote) {
870 0 : NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
871 0 : mCiteQuoteLevel--;
872 0 : mFloatingLines = 0;
873 0 : mHasWrittenCiteBlockquote = true;
874 : }
875 : else {
876 0 : mIndent -= kTabSize;
877 0 : mFloatingLines = 1;
878 : }
879 0 : mLineBreakDue = true;
880 : }
881 30 : else if (aTag == nsGkAtoms::q) {
882 0 : Write(NS_LITERAL_STRING("\""));
883 : }
884 30 : else if (nsContentUtils::IsHTMLBlock(aTag)
885 : && aTag != nsGkAtoms::script) {
886 : // All other blocks get 1 vertical space after them
887 : // in formatted mode, otherwise 0.
888 : // This is hard. Sometimes 0 is a better number, but
889 : // how to know?
890 30 : if (mFlags & nsIDocumentEncoder::OutputFormatted)
891 0 : EnsureVerticalSpace(1);
892 : else {
893 30 : if (mFloatingLines < 0)
894 30 : mFloatingLines = 0;
895 30 : mLineBreakDue = true;
896 : }
897 : }
898 :
899 : //////////////////////////////////////////////////////////////
900 35 : if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
901 35 : return NS_OK;
902 : }
903 : //////////////////////////////////////////////////////////////
904 : // The rest of this routine is formatted output stuff,
905 : // which we should skip if we're not formatted:
906 : //////////////////////////////////////////////////////////////
907 :
908 : // Pop the currentConverted stack
909 0 : bool currentNodeIsConverted = IsCurrentNodeConverted();
910 :
911 0 : if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
912 : aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
913 : aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
914 :
915 0 : if (mHeaderStrategy) { /*numbered or indent increasingly*/
916 0 : mIndent -= kIndentSizeHeaders;
917 : }
918 0 : if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
919 0 : for (PRInt32 i = HeaderLevel(aTag); i > 1; i--) {
920 : // for h(x), run x-1 times
921 0 : mIndent -= kIndentIncrementHeaders;
922 : }
923 : }
924 0 : EnsureVerticalSpace(1);
925 : }
926 0 : else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
927 0 : nsAutoString temp;
928 0 : temp.AssignLiteral(" <");
929 0 : temp += mURL;
930 0 : temp.Append(PRUnichar('>'));
931 0 : Write(temp);
932 0 : mURL.Truncate();
933 : }
934 0 : else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
935 0 : && mStructs && !currentNodeIsConverted) {
936 0 : Write(kSpace);
937 : }
938 0 : else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
939 0 : Write(NS_LITERAL_STRING("|"));
940 : }
941 0 : else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
942 0 : && mStructs && !currentNodeIsConverted) {
943 0 : Write(NS_LITERAL_STRING("*"));
944 : }
945 0 : else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
946 0 : && mStructs && !currentNodeIsConverted) {
947 0 : Write(NS_LITERAL_STRING("/"));
948 : }
949 0 : else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
950 0 : Write(NS_LITERAL_STRING("_"));
951 : }
952 :
953 0 : return NS_OK;
954 : }
955 :
956 : bool
957 331 : nsPlainTextSerializer::MustSuppressLeaf()
958 : {
959 993 : if ((mTagStackIndex > 1 &&
960 331 : mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
961 : (mTagStackIndex > 0 &&
962 331 : mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
963 : // Don't output the contents of SELECT elements;
964 : // Might be nice, eventually, to output just the selected element.
965 : // Read more in bug 31994.
966 0 : return true;
967 : }
968 :
969 993 : if (mTagStackIndex > 0 &&
970 331 : (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
971 331 : mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
972 : // Don't output the contents of <script> or <style> tags;
973 0 : return true;
974 : }
975 :
976 331 : return false;
977 : }
978 :
979 : void
980 289 : nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
981 : {
982 : // If we don't want any output, just return
983 289 : if (!DoOutput()) {
984 0 : return;
985 : }
986 :
987 289 : if (!aIsLineBreak) {
988 : // Make sure to reset this, since it's no longer true.
989 285 : mHasWrittenCiteBlockquote = false;
990 : }
991 :
992 289 : if (mLineBreakDue)
993 0 : EnsureVerticalSpace(mFloatingLines);
994 :
995 289 : if (MustSuppressLeaf()) {
996 0 : return;
997 : }
998 :
999 289 : if (aIsLineBreak) {
1000 : // The only times we want to pass along whitespace from the original
1001 : // html source are if we're forced into preformatted mode via flags,
1002 : // or if we're prettyprinting and we're inside a <pre>.
1003 : // Otherwise, either we're collapsing to minimal text, or we're
1004 : // prettyprinting to mimic the html format, and in neither case
1005 : // does the formatting of the html source help us.
1006 8 : if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
1007 0 : (mPreFormatted && !mWrapColumn) ||
1008 4 : IsInPre()) {
1009 0 : EnsureVerticalSpace(mEmptyLines+1);
1010 : }
1011 4 : else if (!mInWhitespace) {
1012 0 : Write(kSpace);
1013 0 : mInWhitespace = true;
1014 : }
1015 4 : return;
1016 : }
1017 :
1018 : /* Check, if we are in a link (symbolized with mURL containing the URL)
1019 : and the text is equal to the URL. In that case we don't want to output
1020 : the URL twice so we scrap the text in mURL. */
1021 285 : if (!mURL.IsEmpty() && mURL.Equals(aText)) {
1022 0 : mURL.Truncate();
1023 : }
1024 285 : Write(aText);
1025 : }
1026 :
1027 : nsresult
1028 43 : nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag)
1029 : {
1030 : // If we don't want any output, just return
1031 43 : if (!DoOutput()) {
1032 1 : return NS_OK;
1033 : }
1034 :
1035 42 : if (mLineBreakDue)
1036 0 : EnsureVerticalSpace(mFloatingLines);
1037 :
1038 42 : if (MustSuppressLeaf()) {
1039 0 : return NS_OK;
1040 : }
1041 :
1042 42 : if (aTag == nsGkAtoms::br) {
1043 : // Another egregious editor workaround, see bug 38194:
1044 : // ignore the bogus br tags that the editor sticks here and there.
1045 84 : nsAutoString tagAttr;
1046 42 : if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
1047 0 : || !tagAttr.EqualsLiteral("_moz")) {
1048 42 : EnsureVerticalSpace(mEmptyLines+1);
1049 : }
1050 : }
1051 0 : else if (aTag == nsGkAtoms::hr &&
1052 : (mFlags & nsIDocumentEncoder::OutputFormatted)) {
1053 0 : EnsureVerticalSpace(0);
1054 :
1055 : // Make a line of dashes as wide as the wrap width
1056 : // XXX honoring percentage would be nice
1057 0 : nsAutoString line;
1058 0 : PRUint32 width = (mWrapColumn > 0 ? mWrapColumn : 25);
1059 0 : while (line.Length() < width) {
1060 0 : line.Append(PRUnichar('-'));
1061 : }
1062 0 : Write(line);
1063 :
1064 0 : EnsureVerticalSpace(0);
1065 : }
1066 0 : else if (aTag == nsGkAtoms::img) {
1067 : /* Output (in decreasing order of preference)
1068 : alt, title or nothing */
1069 : // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1070 0 : nsAutoString imageDescription;
1071 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
1072 : imageDescription))) {
1073 : // If the alt attribute has an empty value (|alt=""|), output nothing
1074 : }
1075 0 : else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
1076 : imageDescription))
1077 0 : && !imageDescription.IsEmpty()) {
1078 0 : imageDescription = NS_LITERAL_STRING(" [") +
1079 0 : imageDescription +
1080 0 : NS_LITERAL_STRING("] ");
1081 : }
1082 :
1083 0 : Write(imageDescription);
1084 : }
1085 :
1086 42 : return NS_OK;
1087 : }
1088 :
1089 : /**
1090 : * Adds as many newline as necessary to get |noOfRows| empty lines
1091 : *
1092 : * noOfRows = -1 : Being in the middle of some line of text
1093 : * noOfRows = 0 : Being at the start of a line
1094 : * noOfRows = n>0 : Having n empty lines before the current line.
1095 : */
1096 : void
1097 88 : nsPlainTextSerializer::EnsureVerticalSpace(PRInt32 noOfRows)
1098 : {
1099 : // If we have something in the indent we probably want to output
1100 : // it and it's not included in the count for empty lines so we don't
1101 : // realize that we should start a new line.
1102 88 : if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
1103 0 : EndLine(false);
1104 0 : mInWhitespace = true;
1105 : }
1106 :
1107 236 : while(mEmptyLines < noOfRows) {
1108 60 : EndLine(false);
1109 60 : mInWhitespace = true;
1110 : }
1111 88 : mLineBreakDue = false;
1112 88 : mFloatingLines = -1;
1113 88 : }
1114 :
1115 : /**
1116 : * This empties the current line cache without adding a NEWLINE.
1117 : * Should not be used if line wrapping is of importance since
1118 : * this function destroys the cache information.
1119 : *
1120 : * It will also write indentation and quotes if we believe us to be
1121 : * at the start of the line.
1122 : */
1123 : void
1124 702 : nsPlainTextSerializer::FlushLine()
1125 : {
1126 702 : if (!mCurrentLine.IsEmpty()) {
1127 231 : if (mAtFirstColumn) {
1128 231 : OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
1129 : }
1130 :
1131 231 : Output(mCurrentLine);
1132 231 : mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
1133 231 : mCurrentLine.Truncate();
1134 231 : mCurrentLineWidth = 0;
1135 : }
1136 702 : }
1137 :
1138 : /**
1139 : * Prints the text to output to our current output device (the string mOutputString).
1140 : * The only logic here is to replace non breaking spaces with a normal space since
1141 : * most (all?) receivers of the result won't understand the nbsp and even be
1142 : * confused by it.
1143 : */
1144 : void
1145 301 : nsPlainTextSerializer::Output(nsString& aString)
1146 : {
1147 301 : if (!aString.IsEmpty()) {
1148 301 : mStartedOutput = true;
1149 : }
1150 :
1151 301 : if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
1152 : // First, replace all nbsp characters with spaces,
1153 : // which the unicode encoder won't do for us.
1154 301 : aString.ReplaceChar(kNBSP, kSPACE);
1155 : }
1156 301 : mOutputString->Append(aString);
1157 301 : }
1158 :
1159 : static bool
1160 4 : IsSpaceStuffable(const PRUnichar *s)
1161 : {
1162 16 : if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1163 12 : nsCRT::strncmp(s, NS_LITERAL_STRING("From ").get(), 5) == 0)
1164 0 : return true;
1165 : else
1166 4 : return false;
1167 : }
1168 :
1169 : /**
1170 : * This function adds a piece of text to the current stored line. If we are
1171 : * wrapping text and the stored line will become too long, a suitable
1172 : * location to wrap will be found and the line that's complete will be
1173 : * output.
1174 : */
1175 : void
1176 2605 : nsPlainTextSerializer::AddToLine(const PRUnichar * aLineFragment,
1177 : PRInt32 aLineFragmentLength)
1178 : {
1179 2605 : PRUint32 prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
1180 :
1181 2605 : if (mLineBreakDue)
1182 0 : EnsureVerticalSpace(mFloatingLines);
1183 :
1184 2605 : PRInt32 linelength = mCurrentLine.Length();
1185 2605 : if (0 == linelength) {
1186 283 : if (0 == aLineFragmentLength) {
1187 : // Nothing at all. Are you kidding me?
1188 0 : return;
1189 : }
1190 :
1191 283 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1192 2 : if (IsSpaceStuffable(aLineFragment)
1193 : && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1194 : )
1195 : {
1196 : // Space stuffing a la RFC 2646 (format=flowed).
1197 0 : mCurrentLine.Append(PRUnichar(' '));
1198 :
1199 0 : if (MayWrap()) {
1200 0 : mCurrentLineWidth += GetUnicharWidth(' ');
1201 : #ifdef DEBUG_wrapping
1202 : NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
1203 : mCurrentLine.Length()) ==
1204 : (PRInt32)mCurrentLineWidth,
1205 : "mCurrentLineWidth and reality out of sync!");
1206 : #endif
1207 : }
1208 : }
1209 : }
1210 283 : mEmptyLines=-1;
1211 : }
1212 :
1213 2605 : mCurrentLine.Append(aLineFragment, aLineFragmentLength);
1214 2605 : if (MayWrap()) {
1215 : mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
1216 24 : aLineFragmentLength);
1217 : #ifdef DEBUG_wrapping
1218 : NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1219 : mCurrentLine.Length()) ==
1220 : (PRInt32)mCurrentLineWidth,
1221 : "mCurrentLineWidth and reality out of sync!");
1222 : #endif
1223 : }
1224 :
1225 2605 : linelength = mCurrentLine.Length();
1226 :
1227 : // Wrap?
1228 2605 : if (MayWrap())
1229 : {
1230 : #ifdef DEBUG_wrapping
1231 : NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1232 : mCurrentLine.Length()) ==
1233 : (PRInt32)mCurrentLineWidth,
1234 : "mCurrentLineWidth and reality out of sync!");
1235 : #endif
1236 : // Yes, wrap!
1237 : // The "+4" is to avoid wrap lines that only would be a couple
1238 : // of letters too long. We give this bonus only if the
1239 : // wrapcolumn is more than 20.
1240 24 : PRUint32 bonuswidth = (mWrapColumn > 20) ? 4 : 0;
1241 :
1242 : // XXX: Should calculate prefixwidth with GetUnicharStringWidth
1243 50 : while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
1244 : // We go from the end removing one letter at a time until
1245 : // we have a reasonable width
1246 2 : PRInt32 goodSpace = mCurrentLine.Length();
1247 2 : PRUint32 width = mCurrentLineWidth;
1248 15 : while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
1249 11 : goodSpace--;
1250 11 : width -= GetUnicharWidth(mCurrentLine[goodSpace]);
1251 : }
1252 :
1253 2 : goodSpace++;
1254 :
1255 2 : if (mLineBreaker) {
1256 2 : goodSpace = mLineBreaker->Prev(mCurrentLine.get(),
1257 2 : mCurrentLine.Length(), goodSpace);
1258 4 : if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
1259 2 : nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
1260 1 : --goodSpace; // adjust the position since line breaker returns a position next to space
1261 : }
1262 : }
1263 : // fallback if the line breaker is unavailable or failed
1264 2 : if (!mLineBreaker) {
1265 0 : goodSpace = mWrapColumn-prefixwidth;
1266 0 : while (goodSpace >= 0 &&
1267 0 : !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1268 0 : goodSpace--;
1269 : }
1270 : }
1271 :
1272 4 : nsAutoString restOfLine;
1273 2 : if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
1274 : // If we don't found a good place to break, accept long line and
1275 : // try to find another place to break
1276 0 : goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
1277 0 : if (mLineBreaker) {
1278 0 : if ((PRUint32)goodSpace < mCurrentLine.Length())
1279 0 : goodSpace = mLineBreaker->Next(mCurrentLine.get(),
1280 0 : mCurrentLine.Length(), goodSpace);
1281 0 : if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
1282 0 : goodSpace = mCurrentLine.Length();
1283 : }
1284 : // fallback if the line breaker is unavailable or failed
1285 0 : if (!mLineBreaker) {
1286 0 : goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
1287 0 : while (goodSpace < linelength &&
1288 0 : !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1289 0 : goodSpace++;
1290 : }
1291 : }
1292 : }
1293 :
1294 2 : if ((goodSpace < linelength) && (goodSpace > 0)) {
1295 : // Found a place to break
1296 :
1297 : // -1 (trim a char at the break position)
1298 : // only if the line break was a space.
1299 2 : if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1300 1 : mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
1301 : }
1302 : else {
1303 1 : mCurrentLine.Right(restOfLine, linelength-goodSpace);
1304 : }
1305 : // if breaker was U+0020, it has to consider for delsp=yes support
1306 2 : bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
1307 2 : mCurrentLine.Truncate(goodSpace);
1308 2 : EndLine(true, breakBySpace);
1309 2 : mCurrentLine.Truncate();
1310 : // Space stuff new line?
1311 2 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1312 2 : if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
1313 : && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1314 : )
1315 : {
1316 : // Space stuffing a la RFC 2646 (format=flowed).
1317 0 : mCurrentLine.Append(PRUnichar(' '));
1318 : //XXX doesn't seem to work correctly for ' '
1319 : }
1320 : }
1321 2 : mCurrentLine.Append(restOfLine);
1322 : mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
1323 2 : mCurrentLine.Length());
1324 2 : linelength = mCurrentLine.Length();
1325 2 : mEmptyLines = -1;
1326 : }
1327 : else {
1328 : // Nothing to do. Hopefully we get more data later
1329 : // to use for a place to break line
1330 : break;
1331 : }
1332 : }
1333 : }
1334 : else {
1335 : // No wrapping.
1336 : }
1337 : }
1338 :
1339 : /**
1340 : * Outputs the contents of mCurrentLine, and resets line specific
1341 : * variables. Also adds an indentation and prefix if there is
1342 : * one specified. Strips ending spaces from the line if it isn't
1343 : * preformatted.
1344 : */
1345 : void
1346 62 : nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
1347 : {
1348 62 : PRUint32 currentlinelength = mCurrentLine.Length();
1349 :
1350 62 : if (aSoftlinebreak && 0 == currentlinelength) {
1351 : // No meaning
1352 0 : return;
1353 : }
1354 :
1355 : /* In non-preformatted mode, remove spaces from the end of the line for
1356 : * format=flowed compatibility. Don't do this for these special cases:
1357 : * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1358 : * "- -- ", the OpenPGP dash-escaped signature separator in inline
1359 : * signed messages according to the OpenPGP standard (RFC 2440).
1360 : */
1361 184 : if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
1362 : (aSoftlinebreak ||
1363 120 : !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
1364 : // Remove spaces from the end of the line.
1365 178 : while(currentlinelength > 0 &&
1366 54 : mCurrentLine[currentlinelength-1] == ' ') {
1367 0 : --currentlinelength;
1368 : }
1369 62 : mCurrentLine.SetLength(currentlinelength);
1370 : }
1371 :
1372 62 : if (aSoftlinebreak &&
1373 : (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
1374 : (mIndent == 0)) {
1375 : // Add the soft part of the soft linebreak (RFC 2646 4.1)
1376 : // We only do this when there is no indentation since format=flowed
1377 : // lines and indentation doesn't work well together.
1378 :
1379 : // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1380 : // add twice space.
1381 2 : if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
1382 1 : mCurrentLine.Append(NS_LITERAL_STRING(" "));
1383 : else
1384 1 : mCurrentLine.Append(PRUnichar(' '));
1385 : }
1386 :
1387 62 : if (aSoftlinebreak) {
1388 2 : mEmptyLines=0;
1389 : }
1390 : else {
1391 : // Hard break
1392 60 : if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
1393 52 : mEmptyLines=-1;
1394 : }
1395 :
1396 60 : mEmptyLines++;
1397 : }
1398 :
1399 62 : if (mAtFirstColumn) {
1400 : // If we don't have anything "real" to output we have to
1401 : // make sure the indent doesn't end in a space since that
1402 : // would trick a format=flowed-aware receiver.
1403 62 : bool stripTrailingSpaces = mCurrentLine.IsEmpty();
1404 62 : OutputQuotesAndIndent(stripTrailingSpaces);
1405 : }
1406 :
1407 62 : mCurrentLine.Append(mLineBreak);
1408 62 : Output(mCurrentLine);
1409 62 : mCurrentLine.Truncate();
1410 62 : mCurrentLineWidth = 0;
1411 62 : mAtFirstColumn=true;
1412 62 : mInWhitespace=true;
1413 62 : mLineBreakDue = false;
1414 62 : mFloatingLines = -1;
1415 : }
1416 :
1417 :
1418 : /**
1419 : * Outputs the calculated and stored indent and text in the indentation. That is
1420 : * quote chars and numbers for numbered lists and such. It will also reset any
1421 : * stored text to put in the indentation after using it.
1422 : */
1423 : void
1424 293 : nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
1425 : {
1426 586 : nsAutoString stringToOutput;
1427 :
1428 : // Put the mail quote "> " chars in, if appropriate:
1429 293 : if (mCiteQuoteLevel > 0) {
1430 0 : nsAutoString quotes;
1431 0 : for(int i=0; i < mCiteQuoteLevel; i++) {
1432 0 : quotes.Append(PRUnichar('>'));
1433 : }
1434 0 : if (!mCurrentLine.IsEmpty()) {
1435 : /* Better don't output a space here, if the line is empty,
1436 : in case a receiving f=f-aware UA thinks, this were a flowed line,
1437 : which it isn't - it's just empty.
1438 : (Flowed lines may be joined with the following one,
1439 : so the empty line may be lost completely.) */
1440 0 : quotes.Append(PRUnichar(' '));
1441 : }
1442 0 : stringToOutput = quotes;
1443 0 : mAtFirstColumn = false;
1444 : }
1445 :
1446 : // Indent if necessary
1447 293 : PRInt32 indentwidth = mIndent - mInIndentString.Length();
1448 301 : if (indentwidth > 0
1449 8 : && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
1450 : // Don't make empty lines look flowed
1451 : ) {
1452 16 : nsAutoString spaces;
1453 40 : for (int i=0; i < indentwidth; ++i)
1454 32 : spaces.Append(PRUnichar(' '));
1455 8 : stringToOutput += spaces;
1456 8 : mAtFirstColumn = false;
1457 : }
1458 :
1459 293 : if (!mInIndentString.IsEmpty()) {
1460 0 : stringToOutput += mInIndentString;
1461 0 : mAtFirstColumn = false;
1462 0 : mInIndentString.Truncate();
1463 : }
1464 :
1465 293 : if (stripTrailingSpaces) {
1466 8 : PRInt32 lineLength = stringToOutput.Length();
1467 16 : while(lineLength > 0 &&
1468 0 : ' ' == stringToOutput[lineLength-1]) {
1469 0 : --lineLength;
1470 : }
1471 8 : stringToOutput.SetLength(lineLength);
1472 : }
1473 :
1474 293 : if (!stringToOutput.IsEmpty()) {
1475 8 : Output(stringToOutput);
1476 : }
1477 :
1478 293 : }
1479 :
1480 : /**
1481 : * Write a string. This is the highlevel function to use to get text output.
1482 : * By using AddToLine, Output, EndLine and other functions it handles quotation,
1483 : * line wrapping, indentation, whitespace compression and other things.
1484 : */
1485 : void
1486 285 : nsPlainTextSerializer::Write(const nsAString& aStr)
1487 : {
1488 : // XXX Copy necessary to use nsString methods and gain
1489 : // access to underlying buffer
1490 570 : nsAutoString str(aStr);
1491 :
1492 : #ifdef DEBUG_wrapping
1493 : printf("Write(%s): wrap col = %d\n",
1494 : NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
1495 : #endif
1496 :
1497 285 : PRInt32 bol = 0;
1498 : PRInt32 newline;
1499 :
1500 285 : PRInt32 totLen = str.Length();
1501 :
1502 : // If the string is empty, do nothing:
1503 285 : if (totLen <= 0) return;
1504 :
1505 : // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1506 : // to be cut off along with usual spaces if required. (bug #125928)
1507 285 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1508 2 : for (PRInt32 i = totLen-1; i >= 0; i--) {
1509 2 : PRUnichar c = str[i];
1510 2 : if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
1511 0 : continue;
1512 2 : if (kNBSP == c)
1513 0 : str.Replace(i, 1, ' ');
1514 : else
1515 2 : break;
1516 : }
1517 : }
1518 :
1519 : // We have two major codepaths here. One that does preformatted text and one
1520 : // that does normal formatted text. The one for preformatted text calls
1521 : // Output directly while the other code path goes through AddToLine.
1522 285 : if ((mPreFormatted && !mWrapColumn) || IsInPre()
1523 : || ((mSpanLevel > 0 || mDontWrapAnyQuotes)
1524 0 : && mEmptyLines >= 0 && str.First() == PRUnichar('>'))) {
1525 : // No intelligent wrapping.
1526 :
1527 : // This mustn't be mixed with intelligent wrapping without clearing
1528 : // the mCurrentLine buffer before!!!
1529 0 : NS_ASSERTION(mCurrentLine.IsEmpty(),
1530 : "Mixed wrapping data and nonwrapping data on the same line");
1531 0 : if (!mCurrentLine.IsEmpty()) {
1532 0 : FlushLine();
1533 : }
1534 :
1535 : // Put the mail quote "> " chars in, if appropriate.
1536 : // Have to put it in before every line.
1537 0 : while(bol<totLen) {
1538 0 : bool outputQuotes = mAtFirstColumn;
1539 0 : bool atFirstColumn = mAtFirstColumn;
1540 0 : bool outputLineBreak = false;
1541 0 : bool spacesOnly = true;
1542 :
1543 : // Find one of '\n' or '\r' using iterators since nsAString
1544 : // doesn't have the old FindCharInSet function.
1545 0 : nsAString::const_iterator iter; str.BeginReading(iter);
1546 0 : nsAString::const_iterator done_searching; str.EndReading(done_searching);
1547 0 : iter.advance(bol);
1548 0 : PRInt32 new_newline = bol;
1549 0 : newline = kNotFound;
1550 0 : while(iter != done_searching) {
1551 0 : if ('\n' == *iter || '\r' == *iter) {
1552 0 : newline = new_newline;
1553 0 : break;
1554 : }
1555 0 : if (' ' != *iter)
1556 0 : spacesOnly = false;
1557 0 : ++new_newline;
1558 0 : ++iter;
1559 : }
1560 :
1561 : // Done searching
1562 0 : nsAutoString stringpart;
1563 0 : if (newline == kNotFound) {
1564 : // No new lines.
1565 0 : stringpart.Assign(Substring(str, bol, totLen - bol));
1566 0 : if (!stringpart.IsEmpty()) {
1567 0 : PRUnichar lastchar = stringpart[stringpart.Length()-1];
1568 0 : if ((lastchar == '\t') || (lastchar == ' ') ||
1569 : (lastchar == '\r') ||(lastchar == '\n')) {
1570 0 : mInWhitespace = true;
1571 : }
1572 : else {
1573 0 : mInWhitespace = false;
1574 : }
1575 : }
1576 0 : mEmptyLines=-1;
1577 0 : atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
1578 0 : bol = totLen;
1579 : }
1580 : else {
1581 : // There is a newline
1582 0 : stringpart.Assign(Substring(str, bol, newline-bol));
1583 0 : mInWhitespace = true;
1584 0 : outputLineBreak = true;
1585 0 : mEmptyLines=0;
1586 0 : atFirstColumn = true;
1587 0 : bol = newline+1;
1588 0 : if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1589 : // There was a CRLF in the input. This used to be illegal and
1590 : // stripped by the parser. Apparently not anymore. Let's skip
1591 : // over the LF.
1592 0 : bol++;
1593 : }
1594 : }
1595 :
1596 0 : mCurrentLine.AssignLiteral("");
1597 0 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1598 0 : if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
1599 0 : !stringpart.EqualsLiteral("-- ") &&
1600 0 : !stringpart.EqualsLiteral("- -- "))
1601 0 : stringpart.Trim(" ", false, true, true);
1602 0 : if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>')
1603 0 : mCurrentLine.Append(PRUnichar(' '));
1604 : }
1605 0 : mCurrentLine.Append(stringpart);
1606 :
1607 0 : if (outputQuotes) {
1608 : // Note: this call messes with mAtFirstColumn
1609 0 : OutputQuotesAndIndent();
1610 : }
1611 :
1612 0 : Output(mCurrentLine);
1613 0 : if (outputLineBreak) {
1614 0 : Output(mLineBreak);
1615 : }
1616 0 : mAtFirstColumn = atFirstColumn;
1617 : }
1618 :
1619 : // Reset mCurrentLine.
1620 0 : mCurrentLine.Truncate();
1621 :
1622 : #ifdef DEBUG_wrapping
1623 : printf("No wrapping: newline is %d, totLen is %d\n",
1624 : newline, totLen);
1625 : #endif
1626 : return;
1627 : }
1628 :
1629 : // Intelligent handling of text
1630 : // If needed, strip out all "end of lines"
1631 : // and multiple whitespace between words
1632 : PRInt32 nextpos;
1633 285 : const PRUnichar * offsetIntoBuffer = nsnull;
1634 :
1635 2451 : while (bol < totLen) { // Loop over lines
1636 : // Find a place where we may have to do whitespace compression
1637 1881 : nextpos = str.FindCharInSet(" \t\n\r", bol);
1638 : #ifdef DEBUG_wrapping
1639 : nsAutoString remaining;
1640 : str.Right(remaining, totLen - bol);
1641 : foo = ToNewCString(remaining);
1642 : // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
1643 : // bol, nextpos, totLen, foo);
1644 : nsMemory::Free(foo);
1645 : #endif
1646 :
1647 1881 : if (nextpos == kNotFound) {
1648 : // The rest of the string
1649 285 : offsetIntoBuffer = str.get() + bol;
1650 285 : AddToLine(offsetIntoBuffer, totLen-bol);
1651 285 : bol=totLen;
1652 285 : mInWhitespace=false;
1653 : }
1654 : else {
1655 : // There's still whitespace left in the string
1656 1596 : if (nextpos != 0 && (nextpos + 1) < totLen) {
1657 1576 : offsetIntoBuffer = str.get() + nextpos;
1658 : // skip '\n' if it is between CJ chars
1659 1576 : if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
1660 0 : offsetIntoBuffer = str.get() + bol;
1661 0 : AddToLine(offsetIntoBuffer, nextpos-bol);
1662 0 : bol = nextpos + 1;
1663 0 : continue;
1664 : }
1665 : }
1666 : // If we're already in whitespace and not preformatted, just skip it:
1667 2032 : if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
1668 436 : !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1669 : // Skip whitespace
1670 436 : bol++;
1671 436 : continue;
1672 : }
1673 :
1674 1160 : if (nextpos == bol) {
1675 : // Note that we are in whitespace.
1676 0 : mInWhitespace = true;
1677 0 : offsetIntoBuffer = str.get() + nextpos;
1678 0 : AddToLine(offsetIntoBuffer, 1);
1679 0 : bol++;
1680 0 : continue;
1681 : }
1682 :
1683 1160 : mInWhitespace = true;
1684 :
1685 1160 : offsetIntoBuffer = str.get() + bol;
1686 1160 : if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1687 : // Preserve the real whitespace character
1688 0 : nextpos++;
1689 0 : AddToLine(offsetIntoBuffer, nextpos-bol);
1690 0 : bol = nextpos;
1691 : }
1692 : else {
1693 : // Replace the whitespace with a space
1694 1160 : AddToLine(offsetIntoBuffer, nextpos-bol);
1695 1160 : AddToLine(kSpace.get(),1);
1696 1160 : bol = nextpos + 1; // Let's eat the whitespace
1697 : }
1698 : }
1699 : } // Continue looping over the string
1700 : }
1701 :
1702 :
1703 : /**
1704 : * Gets the value of an attribute in a string. If the function returns
1705 : * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1706 : */
1707 : nsresult
1708 280 : nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
1709 : nsString& aValueRet)
1710 : {
1711 280 : if (mElement) {
1712 280 : if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1713 0 : return NS_OK;
1714 : }
1715 : }
1716 :
1717 280 : return NS_ERROR_NOT_AVAILABLE;
1718 : }
1719 :
1720 : /**
1721 : * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1722 : * In this case, we should ignore it.
1723 : */
1724 : bool
1725 4 : nsPlainTextSerializer::IsCurrentNodeConverted()
1726 : {
1727 8 : nsAutoString value;
1728 4 : nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1729 4 : return (NS_SUCCEEDED(rv) &&
1730 0 : (value.EqualsIgnoreCase("moz-txt", 7) ||
1731 4 : value.EqualsIgnoreCase("\"moz-txt", 8)));
1732 : }
1733 :
1734 :
1735 : // static
1736 : nsIAtom*
1737 1560 : nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
1738 : {
1739 1560 : if (!aContent->IsHTML()) {
1740 0 : return nsnull;
1741 : }
1742 :
1743 1560 : nsIAtom* localName = aContent->Tag();
1744 1560 : return localName->IsStaticAtom() ? localName : nsnull;
1745 : }
1746 :
1747 : /**
1748 : * Returns true if we currently are inside a <pre>. The check is done
1749 : * by traversing the tag stack looking for <pre> until we hit a block
1750 : * level tag which is assumed to override any <pre>:s below it in
1751 : * the stack. To do this correctly to a 100% would require access
1752 : * to style which we don't support in this converter.
1753 : */
1754 : bool
1755 289 : nsPlainTextSerializer::IsInPre()
1756 : {
1757 289 : PRInt32 i = mTagStackIndex;
1758 1097 : while(i > 0) {
1759 549 : if (mTagStack[i - 1] == nsGkAtoms::pre)
1760 0 : return true;
1761 549 : if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) {
1762 : // We assume that every other block overrides a <pre>
1763 30 : return false;
1764 : }
1765 519 : --i;
1766 : }
1767 :
1768 : // Not a <pre> in the whole stack
1769 259 : return false;
1770 : }
1771 :
1772 : /**
1773 : * This method is required only to identify LI's inside OL.
1774 : * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1775 : */
1776 : bool
1777 0 : nsPlainTextSerializer::IsInOL()
1778 : {
1779 0 : PRInt32 i = mTagStackIndex;
1780 0 : while(--i >= 0) {
1781 0 : if (mTagStack[i] == nsGkAtoms::ol)
1782 0 : return true;
1783 0 : if (mTagStack[i] == nsGkAtoms::ul) {
1784 : // If a UL is reached first, LI belongs the UL nested in OL.
1785 0 : return false;
1786 : }
1787 : }
1788 : // We may reach here for orphan LI's.
1789 0 : return false;
1790 : }
1791 :
1792 : /*
1793 : @return 0 = no header, 1 = h1, ..., 6 = h6
1794 : */
1795 0 : PRInt32 HeaderLevel(nsIAtom* aTag)
1796 : {
1797 0 : if (aTag == nsGkAtoms::h1) {
1798 0 : return 1;
1799 : }
1800 0 : if (aTag == nsGkAtoms::h2) {
1801 0 : return 2;
1802 : }
1803 0 : if (aTag == nsGkAtoms::h3) {
1804 0 : return 3;
1805 : }
1806 0 : if (aTag == nsGkAtoms::h4) {
1807 0 : return 4;
1808 : }
1809 0 : if (aTag == nsGkAtoms::h5) {
1810 0 : return 5;
1811 : }
1812 0 : if (aTag == nsGkAtoms::h6) {
1813 0 : return 6;
1814 : }
1815 0 : return 0;
1816 : }
1817 :
1818 :
1819 : /*
1820 : * This is an implementation of GetUnicharWidth() and
1821 : * GetUnicharStringWidth() as defined in
1822 : * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1823 : * <http://www.UNIX-systems.org/online.html>
1824 : *
1825 : * Markus Kuhn -- 2000-02-08 -- public domain
1826 : *
1827 : * Minor alterations to fit Mozilla's data types by Daniel Bratell
1828 : */
1829 :
1830 : /* These functions define the column width of an ISO 10646 character
1831 : * as follows:
1832 : *
1833 : * - The null character (U+0000) has a column width of 0.
1834 : *
1835 : * - Other C0/C1 control characters and DEL will lead to a return
1836 : * value of -1.
1837 : *
1838 : * - Non-spacing and enclosing combining characters (general
1839 : * category code Mn or Me in the Unicode database) have a
1840 : * column width of 0.
1841 : *
1842 : * - Spacing characters in the East Asian Wide (W) or East Asian
1843 : * FullWidth (F) category as defined in Unicode Technical
1844 : * Report #11 have a column width of 2.
1845 : *
1846 : * - All remaining characters (including all printable
1847 : * ISO 8859-1 and WGL4 characters, Unicode control characters,
1848 : * etc.) have a column width of 1.
1849 : *
1850 : * This implementation assumes that wchar_t characters are encoded
1851 : * in ISO 10646.
1852 : */
1853 :
1854 157 : PRInt32 GetUnicharWidth(PRUnichar ucs)
1855 : {
1856 : /* sorted list of non-overlapping intervals of non-spacing characters */
1857 : static const struct interval {
1858 : PRUint16 first;
1859 : PRUint16 last;
1860 : } combining[] = {
1861 : { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
1862 : { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
1863 : { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1864 : { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
1865 : { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
1866 : { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
1867 : { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
1868 : { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
1869 : { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
1870 : { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
1871 : { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
1872 : { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
1873 : { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
1874 : { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
1875 : { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
1876 : { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
1877 : { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
1878 : { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
1879 : { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
1880 : { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
1881 : { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
1882 : { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
1883 : { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
1884 : { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
1885 : { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
1886 : { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
1887 : { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
1888 : { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
1889 : { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
1890 : { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
1891 : { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
1892 : };
1893 157 : PRInt32 min = 0;
1894 157 : PRInt32 max = sizeof(combining) / sizeof(struct interval) - 1;
1895 : PRInt32 mid;
1896 :
1897 : /* test for 8-bit control characters */
1898 157 : if (ucs == 0)
1899 0 : return 0;
1900 157 : if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
1901 0 : return -1;
1902 :
1903 : /* first quick check for Latin-1 etc. characters */
1904 157 : if (ucs < combining[0].first)
1905 109 : return 1;
1906 :
1907 : /* binary search in table of non-spacing characters */
1908 384 : while (max >= min) {
1909 288 : mid = (min + max) / 2;
1910 288 : if (combining[mid].last < ucs)
1911 240 : min = mid + 1;
1912 48 : else if (combining[mid].first > ucs)
1913 48 : max = mid - 1;
1914 0 : else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
1915 0 : return 0;
1916 : }
1917 :
1918 : /* if we arrive here, ucs is not a combining or C0/C1 control character */
1919 :
1920 : /* fast test for majority of non-wide scripts */
1921 48 : if (ucs < 0x1100)
1922 0 : return 1;
1923 :
1924 : return 1 +
1925 : ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1926 : (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1927 : ucs != 0x303f) || /* CJK ... Yi */
1928 : (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1929 : (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1930 : (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1931 : (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1932 48 : (ucs >= 0xffe0 && ucs <= 0xffe6));
1933 : }
1934 :
1935 :
1936 26 : PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n)
1937 : {
1938 26 : PRInt32 w, width = 0;
1939 :
1940 172 : for (;*pwcs && n-- > 0; pwcs++)
1941 146 : if ((w = GetUnicharWidth(*pwcs)) < 0)
1942 0 : ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
1943 : else
1944 146 : width += w;
1945 :
1946 26 : return width;
1947 : }
1948 :
|