LCOV - code coverage report
Current view: directory - content/base/src - nsPlainTextSerializer.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 821 442 53.8 %
Date: 2012-06-02 Functions: 39 32 82.1 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is mozilla.org code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *   Daniel Bratell <bratell@lysator.liu.se>
      24                 :  *   Ben Bucksch <mozilla@bucksch.org>
      25                 :  *
      26                 :  * Alternatively, the contents of this file may be used under the terms of
      27                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      28                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      29                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      30                 :  * of those above. If you wish to allow use of your version of this file only
      31                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      32                 :  * use your version of this file under the terms of the MPL, indicate your
      33                 :  * decision by deleting the provisions above and replace them with the notice
      34                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      35                 :  * the provisions above, a recipient may use your version of this file under
      36                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      37                 :  *
      38                 :  * ***** END LICENSE BLOCK ***** */
      39                 : 
      40                 : /*
      41                 :  * nsIContentSerializer implementation that can be used with an
      42                 :  * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
      43                 :  * (eg for copy/paste as plaintext).
      44                 :  */
      45                 : 
      46                 : #include "nsPlainTextSerializer.h"
      47                 : #include "nsLWBrkCIID.h"
      48                 : #include "nsIServiceManager.h"
      49                 : #include "nsGkAtoms.h"
      50                 : #include "nsINameSpaceManager.h"
      51                 : #include "nsTextFragment.h"
      52                 : #include "nsContentUtils.h"
      53                 : #include "nsReadableUtils.h"
      54                 : #include "nsUnicharUtils.h"
      55                 : #include "nsCRT.h"
      56                 : #include "mozilla/dom/Element.h"
      57                 : #include "mozilla/Preferences.h"
      58                 : 
      59                 : using namespace mozilla;
      60                 : using namespace mozilla::dom;
      61                 : 
      62                 : #define PREF_STRUCTS "converter.html2txt.structs"
      63                 : #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
      64                 : 
      65                 : static const  PRInt32 kTabSize=4;
      66                 : static const  PRInt32 kOLNumberWidth = 3;
      67                 : static const  PRInt32 kIndentSizeHeaders = 2;  /* Indention of h1, if
      68                 :                                                 mHeaderStrategy = 1 or = 2.
      69                 :                                                 Indention of other headers
      70                 :                                                 is derived from that.
      71                 :                                                 XXX center h1? */
      72                 : static const  PRInt32 kIndentIncrementHeaders = 2;  /* If mHeaderStrategy = 1,
      73                 :                                                 indent h(x+1) this many
      74                 :                                                 columns more than h(x) */
      75                 : static const  PRInt32 kIndentSizeList = kTabSize;
      76                 :                                // Indention of non-first lines of ul and ol
      77                 : static const  PRInt32 kIndentSizeDD = kTabSize;  // Indention of <dd>
      78                 : static const  PRUnichar  kNBSP = 160;
      79                 : static const  PRUnichar kSPACE = ' ';
      80                 : 
      81                 : static PRInt32 HeaderLevel(nsIAtom* aTag);
      82                 : static PRInt32 GetUnicharWidth(PRUnichar ucs);
      83                 : static PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n);
      84                 : 
      85                 : // Someday may want to make this non-const:
      86                 : static const PRUint32 TagStackSize = 500;
      87                 : static const PRUint32 OLStackSize = 100;
      88                 : 
      89             234 : nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
      90                 : {
      91             234 :   nsPlainTextSerializer* it = new nsPlainTextSerializer();
      92             234 :   if (!it) {
      93               0 :     return NS_ERROR_OUT_OF_MEMORY;
      94                 :   }
      95                 : 
      96             234 :   return CallQueryInterface(it, aSerializer);
      97                 : }
      98                 : 
      99             234 : nsPlainTextSerializer::nsPlainTextSerializer()
     100             234 :   : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
     101                 : {
     102                 : 
     103             234 :   mOutputString = nsnull;
     104             234 :   mHeadLevel = 0;
     105             234 :   mAtFirstColumn = true;
     106             234 :   mIndent = 0;
     107             234 :   mCiteQuoteLevel = 0;
     108             234 :   mStructs = true;       // will be read from prefs later
     109             234 :   mHeaderStrategy = 1 /*indent increasingly*/;   // ditto
     110             234 :   mDontWrapAnyQuotes = false;                 // ditto
     111             234 :   mHasWrittenCiteBlockquote = false;
     112             234 :   mSpanLevel = 0;
     113            1872 :   for (PRInt32 i = 0; i <= 6; i++) {
     114            1638 :     mHeaderCounter[i] = 0;
     115                 :   }
     116                 : 
     117                 :   // Line breaker
     118             234 :   mWrapColumn = 72;     // XXX magic number, we expect someone to reset this
     119             234 :   mCurrentLineWidth = 0;
     120                 : 
     121                 :   // Flow
     122             234 :   mEmptyLines = 1; // The start of the document is an "empty line" in itself,
     123             234 :   mInWhitespace = false;
     124             234 :   mPreFormatted = false;
     125             234 :   mStartedOutput = false;
     126                 : 
     127                 :   // initialize the tag stack to zero:
     128                 :   // The stack only ever contains pointers to static atoms, so they don't
     129                 :   // need refcounting.
     130             234 :   mTagStack = new nsIAtom*[TagStackSize];
     131             234 :   mTagStackIndex = 0;
     132             234 :   mIgnoreAboveIndex = (PRUint32)kNotFound;
     133                 : 
     134                 :   // initialize the OL stack, where numbers for ordered lists are kept
     135             234 :   mOLStack = new PRInt32[OLStackSize];
     136             234 :   mOLStackIndex = 0;
     137                 : 
     138             234 :   mULCount = 0;
     139             234 : }
     140                 : 
     141             702 : nsPlainTextSerializer::~nsPlainTextSerializer()
     142                 : {
     143             234 :   delete[] mTagStack;
     144             234 :   delete[] mOLStack;
     145             234 :   NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!");
     146             936 : }
     147                 : 
     148            2106 : NS_IMPL_ISUPPORTS1(nsPlainTextSerializer,
     149                 :                    nsIContentSerializer)
     150                 : 
     151                 : 
     152                 : NS_IMETHODIMP 
     153             234 : nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
     154                 :                             const char* aCharSet, bool aIsCopying,
     155                 :                             bool aIsWholeDocument)
     156                 : {
     157                 : #ifdef DEBUG
     158                 :   // Check if the major control flags are set correctly.
     159             234 :   if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
     160               2 :     NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
     161                 :                  "If you want format=flowed, you must combine it with "
     162                 :                  "nsIDocumentEncoder::OutputFormatted");
     163                 :   }
     164                 : 
     165             234 :   if (aFlags & nsIDocumentEncoder::OutputFormatted) {
     166               2 :     NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
     167                 :                  "Can't do formatted and preformatted output at the same time!");
     168                 :   }
     169                 : #endif
     170                 : 
     171             234 :   mFlags = aFlags;
     172             234 :   mWrapColumn = aWrapColumn;
     173                 : 
     174                 :   // Only create a linebreaker if we will handle wrapping.
     175             234 :   if (MayWrap()) {
     176               2 :     mLineBreaker = nsContentUtils::LineBreaker();
     177                 :   }
     178                 : 
     179                 :   // Set the line break character:
     180             234 :   if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
     181                 :       && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
     182                 :     // Windows
     183               2 :     mLineBreak.AssignLiteral("\r\n");
     184                 :   }
     185             232 :   else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
     186                 :     // Mac
     187               0 :     mLineBreak.Assign(PRUnichar('\r'));
     188                 :   }
     189             232 :   else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
     190                 :     // Unix/DOM
     191               0 :     mLineBreak.Assign(PRUnichar('\n'));
     192                 :   }
     193                 :   else {
     194                 :     // Platform/default
     195             232 :     mLineBreak.AssignLiteral(NS_LINEBREAK);
     196                 :   }
     197                 : 
     198             234 :   mLineBreakDue = false;
     199             234 :   mFloatingLines = -1;
     200                 : 
     201             234 :   if (mFlags & nsIDocumentEncoder::OutputFormatted) {
     202                 :     // Get some prefs that controls how we do formatted output
     203               2 :     mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
     204                 : 
     205                 :     mHeaderStrategy =
     206               2 :       Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
     207                 : 
     208                 :     // DontWrapAnyQuotes is set according to whether plaintext mail
     209                 :     // is wrapping to window width -- see bug 134439.
     210                 :     // We'll only want this if we're wrapping and formatted.
     211               2 :     if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
     212                 :       mDontWrapAnyQuotes =
     213                 :         Preferences::GetBool("mail.compose.wrap_to_window_width",
     214               2 :                              mDontWrapAnyQuotes);
     215                 :     }
     216                 :   }
     217                 : 
     218                 :   // XXX We should let the caller pass this in.
     219             234 :   if (Preferences::GetBool("browser.frames.enabled")) {
     220             234 :     mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
     221                 :   }
     222                 :   else {
     223               0 :     mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
     224                 :   }
     225                 : 
     226             234 :   return NS_OK;
     227                 : }
     228                 : 
     229                 : bool
     230               0 : nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
     231                 : {
     232               0 :   PRUint32 size = aStack.Length();
     233               0 :   if (size == 0) {
     234               0 :     return false;
     235                 :   }
     236               0 :   return aStack.ElementAt(size-1);
     237                 : }
     238                 : 
     239                 : void
     240               0 : nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
     241                 : {
     242               0 :   PRUint32 size = aStack.Length();
     243               0 :   if (size > 0) {
     244               0 :     aStack.ElementAt(size-1) = aValue;
     245                 :   }
     246                 :   else {
     247               0 :     NS_ERROR("There is no \"Last\" value");
     248                 :   }
     249               0 : }
     250                 : 
     251                 : void
     252               0 : nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
     253                 : {
     254               0 :     aStack.AppendElement(bool(aValue));
     255               0 : }
     256                 : 
     257                 : bool
     258               0 : nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
     259                 : {
     260               0 :   bool returnValue = false;
     261               0 :   PRUint32 size = aStack.Length();
     262               0 :   if (size > 0) {
     263               0 :     returnValue = aStack.ElementAt(size-1);
     264               0 :     aStack.RemoveElementAt(size-1);
     265                 :   }
     266               0 :   return returnValue;
     267                 : }
     268                 : 
     269                 : NS_IMETHODIMP 
     270             286 : nsPlainTextSerializer::AppendText(nsIContent* aText,
     271                 :                                   PRInt32 aStartOffset,
     272                 :                                   PRInt32 aEndOffset, 
     273                 :                                   nsAString& aStr)
     274                 : {
     275             286 :   if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
     276               0 :     return NS_OK;
     277                 :   }
     278                 :     
     279             286 :   NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
     280             286 :   if ( aStartOffset < 0 )
     281               0 :     return NS_ERROR_INVALID_ARG;
     282                 : 
     283             286 :   NS_ENSURE_ARG(aText);
     284                 : 
     285             286 :   nsresult rv = NS_OK;
     286                 : 
     287             286 :   nsIContent* content = aText;
     288                 :   const nsTextFragment* frag;
     289             286 :   if (!content || !(frag = content->GetText())) {
     290               0 :     return NS_ERROR_FAILURE;
     291                 :   }
     292                 :   
     293             286 :   PRInt32 endoffset = (aEndOffset == -1) ? frag->GetLength() : aEndOffset;
     294             286 :   NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
     295                 : 
     296             286 :   PRInt32 length = endoffset - aStartOffset;
     297             286 :   if (length <= 0) {
     298               0 :     return NS_OK;
     299                 :   }
     300                 : 
     301             572 :   nsAutoString textstr;
     302             286 :   if (frag->Is2b()) {
     303               1 :     textstr.Assign(frag->Get2b() + aStartOffset, length);
     304                 :   }
     305                 :   else {
     306                 :     // AssignASCII is for 7-bit character only, so don't use it
     307             285 :     const char *data = frag->Get1b();
     308             285 :     CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
     309                 :   }
     310                 : 
     311             286 :   mOutputString = &aStr;
     312                 : 
     313                 :   // We have to split the string across newlines
     314                 :   // to match parser behavior
     315             286 :   PRInt32 start = 0;
     316             286 :   PRInt32 offset = textstr.FindCharInSet("\n\r");
     317             576 :   while (offset != kNotFound) {
     318                 : 
     319               4 :     if (offset>start) {
     320                 :       // Pass in the line
     321                 :       DoAddText(false,
     322               0 :                 Substring(textstr, start, offset-start));
     323                 :     }
     324                 : 
     325                 :     // Pass in a newline
     326               4 :     DoAddText(true, mLineBreak);
     327                 :     
     328               4 :     start = offset+1;
     329               4 :     offset = textstr.FindCharInSet("\n\r", start);
     330                 :   }
     331                 : 
     332                 :   // Consume the last bit of the string if there's any left
     333             286 :   if (start < length) {
     334             285 :     if (start) {
     335               2 :       DoAddText(false, Substring(textstr, start, length - start));
     336                 :     }
     337                 :     else {
     338             283 :       DoAddText(false, textstr);
     339                 :     }
     340                 :   }
     341                 :   
     342             286 :   mOutputString = nsnull;
     343                 : 
     344             286 :   return rv;
     345                 : }
     346                 : 
     347                 : NS_IMETHODIMP
     348               0 : nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
     349                 :                                           PRInt32 aStartOffset,
     350                 :                                           PRInt32 aEndOffset,
     351                 :                                           nsAString& aStr)
     352                 : {
     353               0 :   return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
     354                 : }
     355                 : 
     356                 : NS_IMETHODIMP
     357             780 : nsPlainTextSerializer::AppendElementStart(Element* aElement,
     358                 :                                           Element* aOriginalElement,
     359                 :                                           nsAString& aStr)
     360                 : {
     361             780 :   NS_ENSURE_ARG(aElement);
     362                 : 
     363             780 :   mElement = aElement;
     364                 : 
     365                 :   nsresult rv;
     366             780 :   nsIAtom* id = GetIdForContent(mElement);
     367                 : 
     368             780 :   bool isContainer = !nsContentUtils::IsHTMLVoid(id);
     369                 : 
     370             780 :   mOutputString = &aStr;
     371                 : 
     372             780 :   if (isContainer) {
     373             737 :     rv = DoOpenContainer(id);
     374                 :   }
     375                 :   else {
     376              43 :     rv = DoAddLeaf(id);
     377                 :   }
     378                 : 
     379             780 :   mElement = nsnull;
     380             780 :   mOutputString = nsnull;
     381                 : 
     382             780 :   if (id == nsGkAtoms::head) {
     383             234 :     ++mHeadLevel;
     384                 :   }
     385                 : 
     386             780 :   return rv;
     387                 : } 
     388                 :  
     389                 : NS_IMETHODIMP 
     390             780 : nsPlainTextSerializer::AppendElementEnd(Element* aElement,
     391                 :                                         nsAString& aStr)
     392                 : {
     393             780 :   NS_ENSURE_ARG(aElement);
     394                 : 
     395             780 :   mElement = aElement;
     396                 : 
     397                 :   nsresult rv;
     398             780 :   nsIAtom* id = GetIdForContent(mElement);
     399                 : 
     400             780 :   bool isContainer = !nsContentUtils::IsHTMLVoid(id);
     401                 : 
     402             780 :   mOutputString = &aStr;
     403                 : 
     404             780 :   rv = NS_OK;
     405             780 :   if (isContainer) {
     406             737 :     rv = DoCloseContainer(id);
     407                 :   }
     408                 : 
     409             780 :   mElement = nsnull;
     410             780 :   mOutputString = nsnull;
     411                 : 
     412             780 :   if (id == nsGkAtoms::head) {
     413             234 :     --mHeadLevel;
     414                 :     NS_ASSERTION(mHeadLevel >= 0, "mHeadLevel < 0");
     415                 :   }
     416                 : 
     417             780 :   return rv;
     418                 : }
     419                 : 
     420                 : NS_IMETHODIMP 
     421             234 : nsPlainTextSerializer::Flush(nsAString& aStr)
     422                 : {
     423             234 :   mOutputString = &aStr;
     424             234 :   FlushLine();
     425             234 :   mOutputString = nsnull;
     426             234 :   return NS_OK;
     427                 : }
     428                 : 
     429                 : NS_IMETHODIMP
     430             234 : nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
     431                 :                                            nsAString& aStr)
     432                 : {
     433             234 :   return NS_OK;
     434                 : }
     435                 : 
     436                 : nsresult
     437             737 : nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag)
     438                 : {
     439             737 :   if (mFlags & nsIDocumentEncoder::OutputRaw) {
     440                 :     // Raw means raw.  Don't even think about doing anything fancy
     441                 :     // here like indenting, adding line breaks or any other
     442                 :     // characters such as list item bullets, quote characters
     443                 :     // around <q>, etc.  I mean it!  Don't make me smack you!
     444                 : 
     445               0 :     return NS_OK;
     446                 :   }
     447                 : 
     448             737 :   if (mTagStackIndex < TagStackSize) {
     449             737 :     mTagStack[mTagStackIndex++] = aTag;
     450                 :   }
     451                 : 
     452             737 :   if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
     453               0 :     return NS_OK;
     454                 :   }
     455                 : 
     456                 :   // Reset this so that <blockquote type=cite> doesn't affect the whitespace
     457                 :   // above random <pre>s below it.
     458                 :   mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
     459             737 :                               aTag == nsGkAtoms::pre;
     460                 : 
     461             737 :   bool isInCiteBlockquote = false;
     462                 : 
     463                 :   // XXX special-case <blockquote type=cite> so that we don't add additional
     464                 :   // newlines before the text.
     465             737 :   if (aTag == nsGkAtoms::blockquote) {
     466               0 :     nsAutoString value;
     467               0 :     nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
     468               0 :     isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
     469                 :   }
     470                 : 
     471             737 :   if (mLineBreakDue && !isInCiteBlockquote)
     472               8 :     EnsureVerticalSpace(mFloatingLines);
     473                 : 
     474                 :   // Check if this tag's content that should not be output
     475             737 :   if ((aTag == nsGkAtoms::noscript &&
     476               0 :        !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
     477                 :       ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
     478               0 :        !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
     479                 :     // Ignore everything that follows the current tag in 
     480                 :     // question until a matching end tag is encountered.
     481               0 :     mIgnoreAboveIndex = mTagStackIndex - 1;
     482               0 :     return NS_OK;
     483                 :   }
     484                 : 
     485             737 :   if (aTag == nsGkAtoms::body) {
     486                 :     // Try to figure out here whether we have a
     487                 :     // preformatted style attribute.
     488                 :     //
     489                 :     // Trigger on the presence of a "pre-wrap" in the
     490                 :     // style attribute. That's a very simplistic way to do
     491                 :     // it, but better than nothing.
     492                 :     // Also set mWrapColumn to the value given there
     493                 :     // (which arguably we should only do if told to do so).
     494             468 :     nsAutoString style;
     495                 :     PRInt32 whitespace;
     496             234 :     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
     497                 :        (kNotFound != (whitespace = style.Find("white-space:")))) {
     498                 : 
     499               0 :       if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
     500                 : #ifdef DEBUG_preformatted
     501                 :         printf("Set mPreFormatted based on style pre-wrap\n");
     502                 : #endif
     503               0 :         mPreFormatted = true;
     504               0 :         PRInt32 widthOffset = style.Find("width:");
     505               0 :         if (widthOffset >= 0) {
     506                 :           // We have to search for the ch before the semicolon,
     507                 :           // not for the semicolon itself, because nsString::ToInteger()
     508                 :           // considers 'c' to be a valid numeric char (even if radix=10)
     509                 :           // but then gets confused if it sees it next to the number
     510                 :           // when the radix specified was 10, and returns an error code.
     511               0 :           PRInt32 semiOffset = style.Find("ch", false, widthOffset+6);
     512                 :           PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6
     513               0 :                             : style.Length() - widthOffset);
     514               0 :           nsAutoString widthstr;
     515               0 :           style.Mid(widthstr, widthOffset+6, length);
     516                 :           PRInt32 err;
     517               0 :           PRInt32 col = widthstr.ToInteger(&err);
     518                 : 
     519               0 :           if (NS_SUCCEEDED(err)) {
     520               0 :             mWrapColumn = (PRUint32)col;
     521                 : #ifdef DEBUG_preformatted
     522                 :             printf("Set wrap column to %d based on style\n", mWrapColumn);
     523                 : #endif
     524                 :           }
     525                 :         }
     526                 :       }
     527               0 :       else if (kNotFound != style.Find("pre", true, whitespace)) {
     528                 : #ifdef DEBUG_preformatted
     529                 :         printf("Set mPreFormatted based on style pre\n");
     530                 : #endif
     531               0 :         mPreFormatted = true;
     532               0 :         mWrapColumn = 0;
     533                 :       }
     534                 :     } 
     535                 :     else {
     536                 :       /* See comment at end of function. */
     537             234 :       mInWhitespace = true;
     538             234 :       mPreFormatted = false;
     539                 :     }
     540                 : 
     541             234 :     return NS_OK;
     542                 :   }
     543                 : 
     544                 :   // Keep this in sync with DoCloseContainer!
     545             503 :   if (!DoOutput()) {
     546               0 :     return NS_OK;
     547                 :   }
     548                 : 
     549             503 :   if (aTag == nsGkAtoms::p)
     550              18 :     EnsureVerticalSpace(1);
     551             485 :   else if (aTag == nsGkAtoms::pre) {
     552               0 :     if (GetLastBool(mIsInCiteBlockquote))
     553               0 :       EnsureVerticalSpace(0);
     554               0 :     else if (mHasWrittenCiteBlockquote) {
     555               0 :       EnsureVerticalSpace(0);
     556               0 :       mHasWrittenCiteBlockquote = false;
     557                 :     }
     558                 :     else
     559               0 :       EnsureVerticalSpace(1);
     560                 :   }
     561             485 :   else if (aTag == nsGkAtoms::tr) {
     562               0 :     PushBool(mHasWrittenCellsForRow, false);
     563                 :   }
     564             485 :   else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
     565                 :     // We must make sure that the content of two table cells get a
     566                 :     // space between them.
     567                 : 
     568                 :     // To make the separation between cells most obvious and
     569                 :     // importable, we use a TAB.
     570               0 :     if (GetLastBool(mHasWrittenCellsForRow)) {
     571                 :       // Bypass |Write| so that the TAB isn't compressed away.
     572               0 :       AddToLine(NS_LITERAL_STRING("\t").get(), 1);
     573               0 :       mInWhitespace = true;
     574                 :     }
     575               0 :     else if (mHasWrittenCellsForRow.IsEmpty()) {
     576                 :       // We don't always see a <tr> (nor a <table>) before the <td> if we're
     577                 :       // copying part of a table
     578               0 :       PushBool(mHasWrittenCellsForRow, true); // will never be popped
     579                 :     }
     580                 :     else {
     581               0 :       SetLastBool(mHasWrittenCellsForRow, true);
     582                 :     }
     583                 :   }
     584             485 :   else if (aTag == nsGkAtoms::ul) {
     585                 :     // Indent here to support nested lists, which aren't included in li :-(
     586               4 :     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
     587                 :          // Must end the current line before we change indention
     588               4 :     mIndent += kIndentSizeList;
     589               4 :     mULCount++;
     590                 :   }
     591             481 :   else if (aTag == nsGkAtoms::ol) {
     592               0 :     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
     593               0 :     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
     594                 :       // Must end the current line before we change indention
     595               0 :       if (mOLStackIndex < OLStackSize) {
     596               0 :         nsAutoString startAttr;
     597               0 :         PRInt32 startVal = 1;
     598               0 :         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
     599               0 :           PRInt32 rv = 0;
     600               0 :           startVal = startAttr.ToInteger(&rv);
     601               0 :           if (NS_FAILED(rv))
     602               0 :             startVal = 1;
     603                 :         }
     604               0 :         mOLStack[mOLStackIndex++] = startVal;
     605                 :       }
     606                 :     } else {
     607               0 :       mOLStackIndex++;
     608                 :     }
     609               0 :     mIndent += kIndentSizeList;  // see ul
     610                 :   }
     611             481 :   else if (aTag == nsGkAtoms::li &&
     612                 :            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
     613               0 :     if (mTagStackIndex > 1 && IsInOL()) {
     614               0 :       if (mOLStackIndex > 0) {
     615               0 :         nsAutoString valueAttr;
     616               0 :         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
     617               0 :           PRInt32 rv = 0;
     618               0 :           PRInt32 valueAttrVal = valueAttr.ToInteger(&rv);
     619               0 :           if (NS_SUCCEEDED(rv))
     620               0 :             mOLStack[mOLStackIndex-1] = valueAttrVal;
     621                 :         }
     622                 :         // This is what nsBulletFrame does for OLs:
     623               0 :         mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
     624                 :       }
     625                 :       else {
     626               0 :         mInIndentString.Append(PRUnichar('#'));
     627                 :       }
     628                 : 
     629               0 :       mInIndentString.Append(PRUnichar('.'));
     630                 : 
     631                 :     }
     632                 :     else {
     633                 :       static char bulletCharArray[] = "*o+#";
     634               0 :       PRUint32 index = mULCount > 0 ? (mULCount - 1) : 3;
     635               0 :       char bulletChar = bulletCharArray[index % 4];
     636               0 :       mInIndentString.Append(PRUnichar(bulletChar));
     637                 :     }
     638                 : 
     639               0 :     mInIndentString.Append(PRUnichar(' '));
     640                 :   }
     641             481 :   else if (aTag == nsGkAtoms::dl) {
     642               0 :     EnsureVerticalSpace(1);
     643                 :   }
     644             481 :   else if (aTag == nsGkAtoms::dt) {
     645               0 :     EnsureVerticalSpace(0);
     646                 :   }
     647             481 :   else if (aTag == nsGkAtoms::dd) {
     648               0 :     EnsureVerticalSpace(0);
     649               0 :     mIndent += kIndentSizeDD;
     650                 :   }
     651             481 :   else if (aTag == nsGkAtoms::span) {
     652               1 :     ++mSpanLevel;
     653                 :   }
     654             480 :   else if (aTag == nsGkAtoms::blockquote) {
     655                 :     // Push
     656               0 :     PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
     657               0 :     if (isInCiteBlockquote) {
     658               0 :       EnsureVerticalSpace(0);
     659               0 :       mCiteQuoteLevel++;
     660                 :     }
     661                 :     else {
     662               0 :       EnsureVerticalSpace(1);
     663               0 :       mIndent += kTabSize; // Check for some maximum value?
     664                 :     }
     665                 :   }
     666             480 :   else if (aTag == nsGkAtoms::q) {
     667               0 :     Write(NS_LITERAL_STRING("\""));
     668                 :   }
     669                 : 
     670                 :   // Else make sure we'll separate block level tags,
     671                 :   // even if we're about to leave, before doing any other formatting.
     672             480 :   else if (nsContentUtils::IsHTMLBlock(aTag)) {
     673              12 :     EnsureVerticalSpace(0);
     674                 :   }
     675                 : 
     676                 :   //////////////////////////////////////////////////////////////
     677             503 :   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
     678             499 :     return NS_OK;
     679                 :   }
     680                 :   //////////////////////////////////////////////////////////////
     681                 :   // The rest of this routine is formatted output stuff,
     682                 :   // which we should skip if we're not formatted:
     683                 :   //////////////////////////////////////////////////////////////
     684                 : 
     685                 :   // Push on stack
     686               4 :   bool currentNodeIsConverted = IsCurrentNodeConverted();
     687                 : 
     688               4 :   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
     689                 :       aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
     690                 :       aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
     691                 :   {
     692               0 :     EnsureVerticalSpace(2);
     693               0 :     if (mHeaderStrategy == 2) {  // numbered
     694               0 :       mIndent += kIndentSizeHeaders;
     695                 :       // Caching
     696               0 :       PRInt32 level = HeaderLevel(aTag);
     697                 :       // Increase counter for current level
     698               0 :       mHeaderCounter[level]++;
     699                 :       // Reset all lower levels
     700                 :       PRInt32 i;
     701                 : 
     702               0 :       for (i = level + 1; i <= 6; i++) {
     703               0 :         mHeaderCounter[i] = 0;
     704                 :       }
     705                 : 
     706                 :       // Construct numbers
     707               0 :       nsAutoString leadup;
     708               0 :       for (i = 1; i <= level; i++) {
     709               0 :         leadup.AppendInt(mHeaderCounter[i]);
     710               0 :         leadup.Append(PRUnichar('.'));
     711                 :       }
     712               0 :       leadup.Append(PRUnichar(' '));
     713               0 :       Write(leadup);
     714                 :     }
     715               0 :     else if (mHeaderStrategy == 1) { // indent increasingly
     716               0 :       mIndent += kIndentSizeHeaders;
     717               0 :       for (PRInt32 i = HeaderLevel(aTag); i > 1; i--) {
     718                 :            // for h(x), run x-1 times
     719               0 :         mIndent += kIndentIncrementHeaders;
     720                 :       }
     721               0 :     }
     722                 :   }
     723               4 :   else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
     724               0 :     nsAutoString url;
     725               0 :     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
     726               0 :         && !url.IsEmpty()) {
     727               0 :       mURL = url;
     728               0 :     }
     729                 :   }
     730               4 :   else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
     731               0 :     Write(NS_LITERAL_STRING("^"));
     732                 :   }
     733               4 :   else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
     734               0 :     Write(NS_LITERAL_STRING("_"));
     735                 :   }
     736               4 :   else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
     737               0 :     Write(NS_LITERAL_STRING("|"));
     738                 :   }
     739               4 :   else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
     740               0 :            && mStructs && !currentNodeIsConverted) {
     741               0 :     Write(NS_LITERAL_STRING("*"));
     742                 :   }
     743               4 :   else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
     744               0 :            && mStructs && !currentNodeIsConverted) {
     745               0 :     Write(NS_LITERAL_STRING("/"));
     746                 :   }
     747               4 :   else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
     748               0 :     Write(NS_LITERAL_STRING("_"));
     749                 :   }
     750                 : 
     751                 :   /* Container elements are always block elements, so we shouldn't
     752                 :      output any whitespace immediately after the container tag even if
     753                 :      there's extra whitespace there because the HTML is pretty-printed
     754                 :      or something. To ensure that happens, tell the serializer we're
     755                 :      already in whitespace so it won't output more. */
     756               4 :   mInWhitespace = true;
     757                 : 
     758               4 :   return NS_OK;
     759                 : }
     760                 : 
     761                 : nsresult
     762             737 : nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
     763                 : {
     764             737 :   if (mFlags & nsIDocumentEncoder::OutputRaw) {
     765                 :     // Raw means raw.  Don't even think about doing anything fancy
     766                 :     // here like indenting, adding line breaks or any other
     767                 :     // characters such as list item bullets, quote characters
     768                 :     // around <q>, etc.  I mean it!  Don't make me smack you!
     769                 : 
     770               0 :     return NS_OK;
     771                 :   }
     772                 : 
     773             737 :   if (mTagStackIndex > 0) {
     774             737 :     --mTagStackIndex;
     775                 :   }
     776                 : 
     777             737 :   if (mTagStackIndex >= mIgnoreAboveIndex) {
     778               0 :     if (mTagStackIndex == mIgnoreAboveIndex) {
     779                 :       // We're dealing with the close tag whose matching
     780                 :       // open tag had set the mIgnoreAboveIndex value.
     781                 :       // Reset mIgnoreAboveIndex before discarding this tag.
     782               0 :       mIgnoreAboveIndex = (PRUint32)kNotFound;
     783                 :     }
     784               0 :     return NS_OK;
     785                 :   }
     786                 : 
     787                 :   // End current line if we're ending a block level tag
     788             737 :   if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
     789                 :     // We want the output to end with a new line,
     790                 :     // but in preformatted areas like text fields,
     791                 :     // we can't emit newlines that weren't there.
     792                 :     // So add the newline only in the case of formatted output.
     793             468 :     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
     794               4 :       EnsureVerticalSpace(0);
     795                 :     }
     796                 :     else {
     797             464 :       FlushLine();
     798                 :     }
     799                 :     // We won't want to do anything with these in formatted mode either,
     800                 :     // so just return now:
     801             468 :     return NS_OK;
     802                 :   }
     803                 : 
     804                 :   // Keep this in sync with DoOpenContainer!
     805             269 :   if (!DoOutput()) {
     806             234 :     return NS_OK;
     807                 :   }
     808                 : 
     809              35 :   if (aTag == nsGkAtoms::tr) {
     810               0 :     PopBool(mHasWrittenCellsForRow);
     811                 :     // Should always end a line, but get no more whitespace
     812               0 :     if (mFloatingLines < 0)
     813               0 :       mFloatingLines = 0;
     814               0 :     mLineBreakDue = true;
     815                 :   }
     816              35 :   else if (((aTag == nsGkAtoms::li) ||
     817                 :             (aTag == nsGkAtoms::dt)) &&
     818                 :            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
     819                 :     // Items that should always end a line, but get no more whitespace
     820               0 :     if (mFloatingLines < 0)
     821               0 :       mFloatingLines = 0;
     822               0 :     mLineBreakDue = true;
     823                 :   }
     824              35 :   else if (aTag == nsGkAtoms::pre) {
     825               0 :     mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
     826               0 :     mLineBreakDue = true;
     827                 :   }
     828              35 :   else if (aTag == nsGkAtoms::ul) {
     829               4 :     FlushLine();
     830               4 :     mIndent -= kIndentSizeList;
     831               4 :     if (--mULCount + mOLStackIndex == 0) {
     832               4 :       mFloatingLines = 1;
     833               4 :       mLineBreakDue = true;
     834                 :     }
     835                 :   }
     836              31 :   else if (aTag == nsGkAtoms::ol) {
     837               0 :     FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
     838               0 :     mIndent -= kIndentSizeList;
     839               0 :     NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
     840               0 :     mOLStackIndex--;
     841               0 :     if (mULCount + mOLStackIndex == 0) {
     842               0 :       mFloatingLines = 1;
     843               0 :       mLineBreakDue = true;
     844                 :     }
     845                 :   }  
     846              31 :   else if (aTag == nsGkAtoms::dl) {
     847               0 :     mFloatingLines = 1;
     848               0 :     mLineBreakDue = true;
     849                 :   }
     850              31 :   else if (aTag == nsGkAtoms::dd) {
     851               0 :     FlushLine();
     852               0 :     mIndent -= kIndentSizeDD;
     853                 :   }
     854              31 :   else if (aTag == nsGkAtoms::span) {
     855               1 :     NS_ASSERTION(mSpanLevel, "Span level will be negative!");
     856               1 :     --mSpanLevel;
     857                 :   }
     858              30 :   else if (aTag == nsGkAtoms::div) {
     859               0 :     if (mFloatingLines < 0)
     860               0 :       mFloatingLines = 0;
     861               0 :     mLineBreakDue = true;
     862                 :   }
     863              30 :   else if (aTag == nsGkAtoms::blockquote) {
     864               0 :     FlushLine();    // Is this needed?
     865                 : 
     866                 :     // Pop
     867               0 :     bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
     868                 : 
     869               0 :     if (isInCiteBlockquote) {
     870               0 :       NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
     871               0 :       mCiteQuoteLevel--;
     872               0 :       mFloatingLines = 0;
     873               0 :       mHasWrittenCiteBlockquote = true;
     874                 :     }
     875                 :     else {
     876               0 :       mIndent -= kTabSize;
     877               0 :       mFloatingLines = 1;
     878                 :     }
     879               0 :     mLineBreakDue = true;
     880                 :   }
     881              30 :   else if (aTag == nsGkAtoms::q) {
     882               0 :     Write(NS_LITERAL_STRING("\""));
     883                 :   }
     884              30 :   else if (nsContentUtils::IsHTMLBlock(aTag)
     885                 :            && aTag != nsGkAtoms::script) {
     886                 :     // All other blocks get 1 vertical space after them
     887                 :     // in formatted mode, otherwise 0.
     888                 :     // This is hard. Sometimes 0 is a better number, but
     889                 :     // how to know?
     890              30 :     if (mFlags & nsIDocumentEncoder::OutputFormatted)
     891               0 :       EnsureVerticalSpace(1);
     892                 :     else {
     893              30 :       if (mFloatingLines < 0)
     894              30 :         mFloatingLines = 0;
     895              30 :       mLineBreakDue = true;
     896                 :     }
     897                 :   }
     898                 : 
     899                 :   //////////////////////////////////////////////////////////////
     900              35 :   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
     901              35 :     return NS_OK;
     902                 :   }
     903                 :   //////////////////////////////////////////////////////////////
     904                 :   // The rest of this routine is formatted output stuff,
     905                 :   // which we should skip if we're not formatted:
     906                 :   //////////////////////////////////////////////////////////////
     907                 : 
     908                 :   // Pop the currentConverted stack
     909               0 :   bool currentNodeIsConverted = IsCurrentNodeConverted();
     910                 :   
     911               0 :   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
     912                 :       aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
     913                 :       aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
     914                 :     
     915               0 :     if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
     916               0 :       mIndent -= kIndentSizeHeaders;
     917                 :     }
     918               0 :     if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
     919               0 :       for (PRInt32 i = HeaderLevel(aTag); i > 1; i--) {
     920                 :            // for h(x), run x-1 times
     921               0 :         mIndent -= kIndentIncrementHeaders;
     922                 :       }
     923                 :     }
     924               0 :     EnsureVerticalSpace(1);
     925                 :   }
     926               0 :   else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
     927               0 :     nsAutoString temp; 
     928               0 :     temp.AssignLiteral(" <");
     929               0 :     temp += mURL;
     930               0 :     temp.Append(PRUnichar('>'));
     931               0 :     Write(temp);
     932               0 :     mURL.Truncate();
     933                 :   }
     934               0 :   else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
     935               0 :            && mStructs && !currentNodeIsConverted) {
     936               0 :     Write(kSpace);
     937                 :   }
     938               0 :   else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
     939               0 :     Write(NS_LITERAL_STRING("|"));
     940                 :   }
     941               0 :   else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
     942               0 :            && mStructs && !currentNodeIsConverted) {
     943               0 :     Write(NS_LITERAL_STRING("*"));
     944                 :   }
     945               0 :   else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
     946               0 :            && mStructs && !currentNodeIsConverted) {
     947               0 :     Write(NS_LITERAL_STRING("/"));
     948                 :   }
     949               0 :   else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
     950               0 :     Write(NS_LITERAL_STRING("_"));
     951                 :   }
     952                 : 
     953               0 :   return NS_OK;
     954                 : }
     955                 : 
     956                 : bool
     957             331 : nsPlainTextSerializer::MustSuppressLeaf()
     958                 : {
     959             993 :   if ((mTagStackIndex > 1 &&
     960             331 :        mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
     961                 :       (mTagStackIndex > 0 &&
     962             331 :         mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
     963                 :     // Don't output the contents of SELECT elements;
     964                 :     // Might be nice, eventually, to output just the selected element.
     965                 :     // Read more in bug 31994.
     966               0 :     return true;
     967                 :   }
     968                 : 
     969             993 :   if (mTagStackIndex > 0 &&
     970             331 :       (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
     971             331 :        mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
     972                 :     // Don't output the contents of <script> or <style> tags;
     973               0 :     return true;
     974                 :   }
     975                 : 
     976             331 :   return false;
     977                 : }
     978                 : 
     979                 : void
     980             289 : nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
     981                 : {
     982                 :   // If we don't want any output, just return
     983             289 :   if (!DoOutput()) {
     984               0 :     return;
     985                 :   }
     986                 : 
     987             289 :   if (!aIsLineBreak) {
     988                 :     // Make sure to reset this, since it's no longer true.
     989             285 :     mHasWrittenCiteBlockquote = false;
     990                 :   }
     991                 : 
     992             289 :   if (mLineBreakDue)
     993               0 :     EnsureVerticalSpace(mFloatingLines);
     994                 : 
     995             289 :   if (MustSuppressLeaf()) {
     996               0 :     return;
     997                 :   }
     998                 : 
     999             289 :   if (aIsLineBreak) {
    1000                 :     // The only times we want to pass along whitespace from the original
    1001                 :     // html source are if we're forced into preformatted mode via flags,
    1002                 :     // or if we're prettyprinting and we're inside a <pre>.
    1003                 :     // Otherwise, either we're collapsing to minimal text, or we're
    1004                 :     // prettyprinting to mimic the html format, and in neither case
    1005                 :     // does the formatting of the html source help us.
    1006               8 :     if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
    1007               0 :         (mPreFormatted && !mWrapColumn) ||
    1008               4 :         IsInPre()) {
    1009               0 :       EnsureVerticalSpace(mEmptyLines+1);
    1010                 :     }
    1011               4 :     else if (!mInWhitespace) {
    1012               0 :       Write(kSpace);
    1013               0 :       mInWhitespace = true;
    1014                 :     }
    1015               4 :     return;
    1016                 :   }
    1017                 : 
    1018                 :   /* Check, if we are in a link (symbolized with mURL containing the URL)
    1019                 :      and the text is equal to the URL. In that case we don't want to output
    1020                 :      the URL twice so we scrap the text in mURL. */
    1021             285 :   if (!mURL.IsEmpty() && mURL.Equals(aText)) {
    1022               0 :     mURL.Truncate();
    1023                 :   }
    1024             285 :   Write(aText);
    1025                 : }
    1026                 : 
    1027                 : nsresult
    1028              43 : nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag)
    1029                 : {
    1030                 :   // If we don't want any output, just return
    1031              43 :   if (!DoOutput()) {
    1032               1 :     return NS_OK;
    1033                 :   }
    1034                 : 
    1035              42 :   if (mLineBreakDue)
    1036               0 :     EnsureVerticalSpace(mFloatingLines);
    1037                 : 
    1038              42 :   if (MustSuppressLeaf()) {
    1039               0 :     return NS_OK;
    1040                 :   }
    1041                 : 
    1042              42 :   if (aTag == nsGkAtoms::br) {
    1043                 :     // Another egregious editor workaround, see bug 38194:
    1044                 :     // ignore the bogus br tags that the editor sticks here and there.
    1045              84 :     nsAutoString tagAttr;
    1046              42 :     if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
    1047               0 :         || !tagAttr.EqualsLiteral("_moz")) {
    1048              42 :       EnsureVerticalSpace(mEmptyLines+1);
    1049                 :     }
    1050                 :   }
    1051               0 :   else if (aTag == nsGkAtoms::hr &&
    1052                 :            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
    1053               0 :     EnsureVerticalSpace(0);
    1054                 : 
    1055                 :     // Make a line of dashes as wide as the wrap width
    1056                 :     // XXX honoring percentage would be nice
    1057               0 :     nsAutoString line;
    1058               0 :     PRUint32 width = (mWrapColumn > 0 ? mWrapColumn : 25);
    1059               0 :     while (line.Length() < width) {
    1060               0 :       line.Append(PRUnichar('-'));
    1061                 :     }
    1062               0 :     Write(line);
    1063                 : 
    1064               0 :     EnsureVerticalSpace(0);
    1065                 :   }
    1066               0 :   else if (aTag == nsGkAtoms::img) {
    1067                 :     /* Output (in decreasing order of preference)
    1068                 :        alt, title or nothing */
    1069                 :     // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
    1070               0 :     nsAutoString imageDescription;
    1071               0 :     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
    1072                 :                                        imageDescription))) {
    1073                 :       // If the alt attribute has an empty value (|alt=""|), output nothing
    1074                 :     }
    1075               0 :     else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
    1076                 :                                             imageDescription))
    1077               0 :              && !imageDescription.IsEmpty()) {
    1078               0 :       imageDescription = NS_LITERAL_STRING(" [") +
    1079               0 :                          imageDescription +
    1080               0 :                          NS_LITERAL_STRING("] ");
    1081                 :     }
    1082                 :    
    1083               0 :     Write(imageDescription);
    1084                 :   }
    1085                 : 
    1086              42 :   return NS_OK;
    1087                 : }
    1088                 : 
    1089                 : /**
    1090                 :  * Adds as many newline as necessary to get |noOfRows| empty lines
    1091                 :  *
    1092                 :  * noOfRows = -1    :   Being in the middle of some line of text
    1093                 :  * noOfRows =  0    :   Being at the start of a line
    1094                 :  * noOfRows =  n>0  :   Having n empty lines before the current line.
    1095                 :  */
    1096                 : void
    1097              88 : nsPlainTextSerializer::EnsureVerticalSpace(PRInt32 noOfRows)
    1098                 : {
    1099                 :   // If we have something in the indent we probably want to output
    1100                 :   // it and it's not included in the count for empty lines so we don't
    1101                 :   // realize that we should start a new line.
    1102              88 :   if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
    1103               0 :     EndLine(false);
    1104               0 :     mInWhitespace = true;
    1105                 :   }
    1106                 : 
    1107             236 :   while(mEmptyLines < noOfRows) {
    1108              60 :     EndLine(false);
    1109              60 :     mInWhitespace = true;
    1110                 :   }
    1111              88 :   mLineBreakDue = false;
    1112              88 :   mFloatingLines = -1;
    1113              88 : }
    1114                 : 
    1115                 : /**
    1116                 :  * This empties the current line cache without adding a NEWLINE.
    1117                 :  * Should not be used if line wrapping is of importance since
    1118                 :  * this function destroys the cache information.
    1119                 :  *
    1120                 :  * It will also write indentation and quotes if we believe us to be
    1121                 :  * at the start of the line.
    1122                 :  */
    1123                 : void
    1124             702 : nsPlainTextSerializer::FlushLine()
    1125                 : {
    1126             702 :   if (!mCurrentLine.IsEmpty()) {
    1127             231 :     if (mAtFirstColumn) {
    1128             231 :       OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
    1129                 :     }
    1130                 : 
    1131             231 :     Output(mCurrentLine);
    1132             231 :     mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
    1133             231 :     mCurrentLine.Truncate();
    1134             231 :     mCurrentLineWidth = 0;
    1135                 :   }
    1136             702 : }
    1137                 : 
    1138                 : /**
    1139                 :  * Prints the text to output to our current output device (the string mOutputString).
    1140                 :  * The only logic here is to replace non breaking spaces with a normal space since
    1141                 :  * most (all?) receivers of the result won't understand the nbsp and even be
    1142                 :  * confused by it.
    1143                 :  */
    1144                 : void 
    1145             301 : nsPlainTextSerializer::Output(nsString& aString)
    1146                 : {
    1147             301 :   if (!aString.IsEmpty()) {
    1148             301 :     mStartedOutput = true;
    1149                 :   }
    1150                 : 
    1151             301 :   if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
    1152                 :     // First, replace all nbsp characters with spaces,
    1153                 :     // which the unicode encoder won't do for us.
    1154             301 :     aString.ReplaceChar(kNBSP, kSPACE);
    1155                 :   }
    1156             301 :   mOutputString->Append(aString);
    1157             301 : }
    1158                 : 
    1159                 : static bool
    1160               4 : IsSpaceStuffable(const PRUnichar *s)
    1161                 : {
    1162              16 :   if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
    1163              12 :       nsCRT::strncmp(s, NS_LITERAL_STRING("From ").get(), 5) == 0)
    1164               0 :     return true;
    1165                 :   else
    1166               4 :     return false;
    1167                 : }
    1168                 : 
    1169                 : /**
    1170                 :  * This function adds a piece of text to the current stored line. If we are
    1171                 :  * wrapping text and the stored line will become too long, a suitable
    1172                 :  * location to wrap will be found and the line that's complete will be
    1173                 :  * output.
    1174                 :  */
    1175                 : void
    1176            2605 : nsPlainTextSerializer::AddToLine(const PRUnichar * aLineFragment, 
    1177                 :                                  PRInt32 aLineFragmentLength)
    1178                 : {
    1179            2605 :   PRUint32 prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
    1180                 :   
    1181            2605 :   if (mLineBreakDue)
    1182               0 :     EnsureVerticalSpace(mFloatingLines);
    1183                 : 
    1184            2605 :   PRInt32 linelength = mCurrentLine.Length();
    1185            2605 :   if (0 == linelength) {
    1186             283 :     if (0 == aLineFragmentLength) {
    1187                 :       // Nothing at all. Are you kidding me?
    1188               0 :       return;
    1189                 :     }
    1190                 : 
    1191             283 :     if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
    1192               2 :       if (IsSpaceStuffable(aLineFragment)
    1193                 :          && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
    1194                 :          )
    1195                 :         {
    1196                 :           // Space stuffing a la RFC 2646 (format=flowed).
    1197               0 :           mCurrentLine.Append(PRUnichar(' '));
    1198                 :           
    1199               0 :           if (MayWrap()) {
    1200               0 :             mCurrentLineWidth += GetUnicharWidth(' ');
    1201                 : #ifdef DEBUG_wrapping
    1202                 :             NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
    1203                 :                                                mCurrentLine.Length()) ==
    1204                 :                          (PRInt32)mCurrentLineWidth,
    1205                 :                          "mCurrentLineWidth and reality out of sync!");
    1206                 : #endif
    1207                 :           }
    1208                 :         }
    1209                 :     }
    1210             283 :     mEmptyLines=-1;
    1211                 :   }
    1212                 :     
    1213            2605 :   mCurrentLine.Append(aLineFragment, aLineFragmentLength);
    1214            2605 :   if (MayWrap()) {
    1215                 :     mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
    1216              24 :                                                aLineFragmentLength);
    1217                 : #ifdef DEBUG_wrapping
    1218                 :     NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
    1219                 :                                        mCurrentLine.Length()) ==
    1220                 :                  (PRInt32)mCurrentLineWidth,
    1221                 :                  "mCurrentLineWidth and reality out of sync!");
    1222                 : #endif
    1223                 :   }
    1224                 : 
    1225            2605 :   linelength = mCurrentLine.Length();
    1226                 : 
    1227                 :   //  Wrap?
    1228            2605 :   if (MayWrap())
    1229                 :   {
    1230                 : #ifdef DEBUG_wrapping
    1231                 :     NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
    1232                 :                                   mCurrentLine.Length()) ==
    1233                 :                  (PRInt32)mCurrentLineWidth,
    1234                 :                  "mCurrentLineWidth and reality out of sync!");
    1235                 : #endif
    1236                 :     // Yes, wrap!
    1237                 :     // The "+4" is to avoid wrap lines that only would be a couple
    1238                 :     // of letters too long. We give this bonus only if the
    1239                 :     // wrapcolumn is more than 20.
    1240              24 :     PRUint32 bonuswidth = (mWrapColumn > 20) ? 4 : 0;
    1241                 : 
    1242                 :     // XXX: Should calculate prefixwidth with GetUnicharStringWidth
    1243              50 :     while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {      
    1244                 :       // We go from the end removing one letter at a time until
    1245                 :       // we have a reasonable width
    1246               2 :       PRInt32 goodSpace = mCurrentLine.Length();
    1247               2 :       PRUint32 width = mCurrentLineWidth;
    1248              15 :       while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
    1249              11 :         goodSpace--;
    1250              11 :         width -= GetUnicharWidth(mCurrentLine[goodSpace]);
    1251                 :       }
    1252                 : 
    1253               2 :       goodSpace++;
    1254                 :       
    1255               2 :       if (mLineBreaker) {
    1256               2 :         goodSpace = mLineBreaker->Prev(mCurrentLine.get(), 
    1257               2 :                                     mCurrentLine.Length(), goodSpace);
    1258               4 :         if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
    1259               2 :             nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
    1260               1 :           --goodSpace;    // adjust the position since line breaker returns a position next to space
    1261                 :         }
    1262                 :       }
    1263                 :       // fallback if the line breaker is unavailable or failed
    1264               2 :       if (!mLineBreaker) {
    1265               0 :         goodSpace = mWrapColumn-prefixwidth;
    1266               0 :         while (goodSpace >= 0 &&
    1267               0 :                !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
    1268               0 :           goodSpace--;
    1269                 :         }
    1270                 :       }
    1271                 :       
    1272               4 :       nsAutoString restOfLine;
    1273               2 :       if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
    1274                 :         // If we don't found a good place to break, accept long line and
    1275                 :         // try to find another place to break
    1276               0 :         goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
    1277               0 :         if (mLineBreaker) {
    1278               0 :           if ((PRUint32)goodSpace < mCurrentLine.Length())
    1279               0 :             goodSpace = mLineBreaker->Next(mCurrentLine.get(), 
    1280               0 :                                            mCurrentLine.Length(), goodSpace);
    1281               0 :           if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
    1282               0 :             goodSpace = mCurrentLine.Length();
    1283                 :         }
    1284                 :         // fallback if the line breaker is unavailable or failed
    1285               0 :         if (!mLineBreaker) {
    1286               0 :           goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
    1287               0 :           while (goodSpace < linelength &&
    1288               0 :                  !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
    1289               0 :             goodSpace++;
    1290                 :           }
    1291                 :         }
    1292                 :       }
    1293                 :       
    1294               2 :       if ((goodSpace < linelength) && (goodSpace > 0)) {
    1295                 :         // Found a place to break
    1296                 : 
    1297                 :         // -1 (trim a char at the break position)
    1298                 :         // only if the line break was a space.
    1299               2 :         if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
    1300               1 :           mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
    1301                 :         }
    1302                 :         else {
    1303               1 :           mCurrentLine.Right(restOfLine, linelength-goodSpace);
    1304                 :         }
    1305                 :         // if breaker was U+0020, it has to consider for delsp=yes support
    1306               2 :         bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
    1307               2 :         mCurrentLine.Truncate(goodSpace); 
    1308               2 :         EndLine(true, breakBySpace);
    1309               2 :         mCurrentLine.Truncate();
    1310                 :         // Space stuff new line?
    1311               2 :         if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
    1312               2 :           if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
    1313                 :               && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
    1314                 :             )
    1315                 :           {
    1316                 :             // Space stuffing a la RFC 2646 (format=flowed).
    1317               0 :             mCurrentLine.Append(PRUnichar(' '));
    1318                 :             //XXX doesn't seem to work correctly for ' '
    1319                 :           }
    1320                 :         }
    1321               2 :         mCurrentLine.Append(restOfLine);
    1322                 :         mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
    1323               2 :                                                   mCurrentLine.Length());
    1324               2 :         linelength = mCurrentLine.Length();
    1325               2 :         mEmptyLines = -1;
    1326                 :       } 
    1327                 :       else {
    1328                 :         // Nothing to do. Hopefully we get more data later
    1329                 :         // to use for a place to break line
    1330                 :         break;
    1331                 :       }
    1332                 :     }
    1333                 :   } 
    1334                 :   else {
    1335                 :     // No wrapping.
    1336                 :   }
    1337                 : }
    1338                 : 
    1339                 : /**
    1340                 :  * Outputs the contents of mCurrentLine, and resets line specific
    1341                 :  * variables. Also adds an indentation and prefix if there is
    1342                 :  * one specified. Strips ending spaces from the line if it isn't
    1343                 :  * preformatted.
    1344                 :  */
    1345                 : void
    1346              62 : nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
    1347                 : {
    1348              62 :   PRUint32 currentlinelength = mCurrentLine.Length();
    1349                 : 
    1350              62 :   if (aSoftlinebreak && 0 == currentlinelength) {
    1351                 :     // No meaning
    1352               0 :     return;
    1353                 :   }
    1354                 : 
    1355                 :   /* In non-preformatted mode, remove spaces from the end of the line for
    1356                 :    * format=flowed compatibility. Don't do this for these special cases:
    1357                 :    * "-- ", the signature separator (RFC 2646) shouldn't be touched and
    1358                 :    * "- -- ", the OpenPGP dash-escaped signature separator in inline
    1359                 :    * signed messages according to the OpenPGP standard (RFC 2440).
    1360                 :    */  
    1361             184 :   if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
    1362                 :      (aSoftlinebreak || 
    1363             120 :      !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
    1364                 :     // Remove spaces from the end of the line.
    1365             178 :     while(currentlinelength > 0 &&
    1366              54 :           mCurrentLine[currentlinelength-1] == ' ') {
    1367               0 :       --currentlinelength;
    1368                 :     }
    1369              62 :     mCurrentLine.SetLength(currentlinelength);
    1370                 :   }
    1371                 :   
    1372              62 :   if (aSoftlinebreak &&
    1373                 :      (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
    1374                 :      (mIndent == 0)) {
    1375                 :     // Add the soft part of the soft linebreak (RFC 2646 4.1)
    1376                 :     // We only do this when there is no indentation since format=flowed
    1377                 :     // lines and indentation doesn't work well together.
    1378                 : 
    1379                 :     // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
    1380                 :     // add twice space.
    1381               2 :     if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
    1382               1 :       mCurrentLine.Append(NS_LITERAL_STRING("  "));
    1383                 :     else
    1384               1 :       mCurrentLine.Append(PRUnichar(' '));
    1385                 :   }
    1386                 : 
    1387              62 :   if (aSoftlinebreak) {
    1388               2 :     mEmptyLines=0;
    1389                 :   } 
    1390                 :   else {
    1391                 :     // Hard break
    1392              60 :     if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
    1393              52 :       mEmptyLines=-1;
    1394                 :     }
    1395                 : 
    1396              60 :     mEmptyLines++;
    1397                 :   }
    1398                 : 
    1399              62 :   if (mAtFirstColumn) {
    1400                 :     // If we don't have anything "real" to output we have to
    1401                 :     // make sure the indent doesn't end in a space since that
    1402                 :     // would trick a format=flowed-aware receiver.
    1403              62 :     bool stripTrailingSpaces = mCurrentLine.IsEmpty();
    1404              62 :     OutputQuotesAndIndent(stripTrailingSpaces);
    1405                 :   }
    1406                 : 
    1407              62 :   mCurrentLine.Append(mLineBreak);
    1408              62 :   Output(mCurrentLine);
    1409              62 :   mCurrentLine.Truncate();
    1410              62 :   mCurrentLineWidth = 0;
    1411              62 :   mAtFirstColumn=true;
    1412              62 :   mInWhitespace=true;
    1413              62 :   mLineBreakDue = false;
    1414              62 :   mFloatingLines = -1;
    1415                 : }
    1416                 : 
    1417                 : 
    1418                 : /**
    1419                 :  * Outputs the calculated and stored indent and text in the indentation. That is
    1420                 :  * quote chars and numbers for numbered lists and such. It will also reset any
    1421                 :  * stored text to put in the indentation after using it.
    1422                 :  */
    1423                 : void
    1424             293 : nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
    1425                 : {
    1426             586 :   nsAutoString stringToOutput;
    1427                 :   
    1428                 :   // Put the mail quote "> " chars in, if appropriate:
    1429             293 :   if (mCiteQuoteLevel > 0) {
    1430               0 :     nsAutoString quotes;
    1431               0 :     for(int i=0; i < mCiteQuoteLevel; i++) {
    1432               0 :       quotes.Append(PRUnichar('>'));
    1433                 :     }
    1434               0 :     if (!mCurrentLine.IsEmpty()) {
    1435                 :       /* Better don't output a space here, if the line is empty,
    1436                 :          in case a receiving f=f-aware UA thinks, this were a flowed line,
    1437                 :          which it isn't - it's just empty.
    1438                 :          (Flowed lines may be joined with the following one,
    1439                 :          so the empty line may be lost completely.) */
    1440               0 :       quotes.Append(PRUnichar(' '));
    1441                 :     }
    1442               0 :     stringToOutput = quotes;
    1443               0 :     mAtFirstColumn = false;
    1444                 :   }
    1445                 :   
    1446                 :   // Indent if necessary
    1447             293 :   PRInt32 indentwidth = mIndent - mInIndentString.Length();
    1448             301 :   if (indentwidth > 0
    1449               8 :       && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
    1450                 :       // Don't make empty lines look flowed
    1451                 :       ) {
    1452              16 :     nsAutoString spaces;
    1453              40 :     for (int i=0; i < indentwidth; ++i)
    1454              32 :       spaces.Append(PRUnichar(' '));
    1455               8 :     stringToOutput += spaces;
    1456               8 :     mAtFirstColumn = false;
    1457                 :   }
    1458                 :   
    1459             293 :   if (!mInIndentString.IsEmpty()) {
    1460               0 :     stringToOutput += mInIndentString;
    1461               0 :     mAtFirstColumn = false;
    1462               0 :     mInIndentString.Truncate();
    1463                 :   }
    1464                 : 
    1465             293 :   if (stripTrailingSpaces) {
    1466               8 :     PRInt32 lineLength = stringToOutput.Length();
    1467              16 :     while(lineLength > 0 &&
    1468               0 :           ' ' == stringToOutput[lineLength-1]) {
    1469               0 :       --lineLength;
    1470                 :     }
    1471               8 :     stringToOutput.SetLength(lineLength);
    1472                 :   }
    1473                 : 
    1474             293 :   if (!stringToOutput.IsEmpty()) {
    1475               8 :     Output(stringToOutput);
    1476                 :   }
    1477                 :     
    1478             293 : }
    1479                 : 
    1480                 : /**
    1481                 :  * Write a string. This is the highlevel function to use to get text output.
    1482                 :  * By using AddToLine, Output, EndLine and other functions it handles quotation,
    1483                 :  * line wrapping, indentation, whitespace compression and other things.
    1484                 :  */
    1485                 : void
    1486             285 : nsPlainTextSerializer::Write(const nsAString& aStr)
    1487                 : {
    1488                 :   // XXX Copy necessary to use nsString methods and gain
    1489                 :   // access to underlying buffer
    1490             570 :   nsAutoString str(aStr);
    1491                 : 
    1492                 : #ifdef DEBUG_wrapping
    1493                 :   printf("Write(%s): wrap col = %d\n",
    1494                 :          NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
    1495                 : #endif
    1496                 : 
    1497             285 :   PRInt32 bol = 0;
    1498                 :   PRInt32 newline;
    1499                 :   
    1500             285 :   PRInt32 totLen = str.Length();
    1501                 : 
    1502                 :   // If the string is empty, do nothing:
    1503             285 :   if (totLen <= 0) return;
    1504                 : 
    1505                 :   // For Flowed text change nbsp-ses to spaces at end of lines to allow them
    1506                 :   // to be cut off along with usual spaces if required. (bug #125928)
    1507             285 :   if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
    1508               2 :     for (PRInt32 i = totLen-1; i >= 0; i--) {
    1509               2 :       PRUnichar c = str[i];
    1510               2 :       if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
    1511               0 :         continue;
    1512               2 :       if (kNBSP == c)
    1513               0 :         str.Replace(i, 1, ' ');
    1514                 :       else
    1515               2 :         break;
    1516                 :     }
    1517                 :   }
    1518                 : 
    1519                 :   // We have two major codepaths here. One that does preformatted text and one
    1520                 :   // that does normal formatted text. The one for preformatted text calls
    1521                 :   // Output directly while the other code path goes through AddToLine.
    1522             285 :   if ((mPreFormatted && !mWrapColumn) || IsInPre()
    1523                 :       || ((mSpanLevel > 0 || mDontWrapAnyQuotes)
    1524               0 :           && mEmptyLines >= 0 && str.First() == PRUnichar('>'))) {
    1525                 :     // No intelligent wrapping.
    1526                 : 
    1527                 :     // This mustn't be mixed with intelligent wrapping without clearing
    1528                 :     // the mCurrentLine buffer before!!!
    1529               0 :     NS_ASSERTION(mCurrentLine.IsEmpty(),
    1530                 :                  "Mixed wrapping data and nonwrapping data on the same line");
    1531               0 :     if (!mCurrentLine.IsEmpty()) {
    1532               0 :       FlushLine();
    1533                 :     }
    1534                 : 
    1535                 :     // Put the mail quote "> " chars in, if appropriate.
    1536                 :     // Have to put it in before every line.
    1537               0 :     while(bol<totLen) {
    1538               0 :       bool outputQuotes = mAtFirstColumn;
    1539               0 :       bool atFirstColumn = mAtFirstColumn;
    1540               0 :       bool outputLineBreak = false;
    1541               0 :       bool spacesOnly = true;
    1542                 : 
    1543                 :       // Find one of '\n' or '\r' using iterators since nsAString
    1544                 :       // doesn't have the old FindCharInSet function.
    1545               0 :       nsAString::const_iterator iter;           str.BeginReading(iter);
    1546               0 :       nsAString::const_iterator done_searching; str.EndReading(done_searching);
    1547               0 :       iter.advance(bol); 
    1548               0 :       PRInt32 new_newline = bol;
    1549               0 :       newline = kNotFound;
    1550               0 :       while(iter != done_searching) {
    1551               0 :         if ('\n' == *iter || '\r' == *iter) {
    1552               0 :           newline = new_newline;
    1553               0 :           break;
    1554                 :         }
    1555               0 :         if (' ' != *iter)
    1556               0 :           spacesOnly = false;
    1557               0 :         ++new_newline;
    1558               0 :         ++iter;
    1559                 :       }
    1560                 : 
    1561                 :       // Done searching
    1562               0 :       nsAutoString stringpart;
    1563               0 :       if (newline == kNotFound) {
    1564                 :         // No new lines.
    1565               0 :         stringpart.Assign(Substring(str, bol, totLen - bol));
    1566               0 :         if (!stringpart.IsEmpty()) {
    1567               0 :           PRUnichar lastchar = stringpart[stringpart.Length()-1];
    1568               0 :           if ((lastchar == '\t') || (lastchar == ' ') ||
    1569                 :              (lastchar == '\r') ||(lastchar == '\n')) {
    1570               0 :             mInWhitespace = true;
    1571                 :           } 
    1572                 :           else {
    1573               0 :             mInWhitespace = false;
    1574                 :           }
    1575                 :         }
    1576               0 :         mEmptyLines=-1;
    1577               0 :         atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
    1578               0 :         bol = totLen;
    1579                 :       } 
    1580                 :       else {
    1581                 :         // There is a newline
    1582               0 :         stringpart.Assign(Substring(str, bol, newline-bol));
    1583               0 :         mInWhitespace = true;
    1584               0 :         outputLineBreak = true;
    1585               0 :         mEmptyLines=0;
    1586               0 :         atFirstColumn = true;
    1587               0 :         bol = newline+1;
    1588               0 :         if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
    1589                 :           // There was a CRLF in the input. This used to be illegal and
    1590                 :           // stripped by the parser. Apparently not anymore. Let's skip
    1591                 :           // over the LF.
    1592               0 :           bol++;
    1593                 :         }
    1594                 :       }
    1595                 : 
    1596               0 :       mCurrentLine.AssignLiteral("");
    1597               0 :       if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
    1598               0 :         if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
    1599               0 :             !stringpart.EqualsLiteral("-- ") &&
    1600               0 :             !stringpart.EqualsLiteral("- -- "))
    1601               0 :           stringpart.Trim(" ", false, true, true);
    1602               0 :         if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>')
    1603               0 :           mCurrentLine.Append(PRUnichar(' '));
    1604                 :       }
    1605               0 :       mCurrentLine.Append(stringpart);
    1606                 : 
    1607               0 :       if (outputQuotes) {
    1608                 :         // Note: this call messes with mAtFirstColumn
    1609               0 :         OutputQuotesAndIndent();
    1610                 :       }
    1611                 : 
    1612               0 :       Output(mCurrentLine);
    1613               0 :       if (outputLineBreak) {
    1614               0 :         Output(mLineBreak);
    1615                 :       }
    1616               0 :       mAtFirstColumn = atFirstColumn;
    1617                 :     }
    1618                 : 
    1619                 :     // Reset mCurrentLine.
    1620               0 :     mCurrentLine.Truncate();
    1621                 : 
    1622                 : #ifdef DEBUG_wrapping
    1623                 :     printf("No wrapping: newline is %d, totLen is %d\n",
    1624                 :            newline, totLen);
    1625                 : #endif
    1626                 :     return;
    1627                 :   }
    1628                 : 
    1629                 :   // Intelligent handling of text
    1630                 :   // If needed, strip out all "end of lines"
    1631                 :   // and multiple whitespace between words
    1632                 :   PRInt32 nextpos;
    1633             285 :   const PRUnichar * offsetIntoBuffer = nsnull;
    1634                 :   
    1635            2451 :   while (bol < totLen) {    // Loop over lines
    1636                 :     // Find a place where we may have to do whitespace compression
    1637            1881 :     nextpos = str.FindCharInSet(" \t\n\r", bol);
    1638                 : #ifdef DEBUG_wrapping
    1639                 :     nsAutoString remaining;
    1640                 :     str.Right(remaining, totLen - bol);
    1641                 :     foo = ToNewCString(remaining);
    1642                 :     //    printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
    1643                 :     //           bol, nextpos, totLen, foo);
    1644                 :     nsMemory::Free(foo);
    1645                 : #endif
    1646                 : 
    1647            1881 :     if (nextpos == kNotFound) {
    1648                 :       // The rest of the string
    1649             285 :       offsetIntoBuffer = str.get() + bol;
    1650             285 :       AddToLine(offsetIntoBuffer, totLen-bol);
    1651             285 :       bol=totLen;
    1652             285 :       mInWhitespace=false;
    1653                 :     } 
    1654                 :     else {
    1655                 :       // There's still whitespace left in the string
    1656            1596 :       if (nextpos != 0 && (nextpos + 1) < totLen) {
    1657            1576 :         offsetIntoBuffer = str.get() + nextpos;
    1658                 :         // skip '\n' if it is between CJ chars
    1659            1576 :         if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
    1660               0 :           offsetIntoBuffer = str.get() + bol;
    1661               0 :           AddToLine(offsetIntoBuffer, nextpos-bol);
    1662               0 :           bol = nextpos + 1;
    1663               0 :           continue;
    1664                 :         }
    1665                 :       }
    1666                 :       // If we're already in whitespace and not preformatted, just skip it:
    1667            2032 :       if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
    1668             436 :           !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
    1669                 :         // Skip whitespace
    1670             436 :         bol++;
    1671             436 :         continue;
    1672                 :       }
    1673                 : 
    1674            1160 :       if (nextpos == bol) {
    1675                 :         // Note that we are in whitespace.
    1676               0 :         mInWhitespace = true;
    1677               0 :         offsetIntoBuffer = str.get() + nextpos;
    1678               0 :         AddToLine(offsetIntoBuffer, 1);
    1679               0 :         bol++;
    1680               0 :         continue;
    1681                 :       }
    1682                 :       
    1683            1160 :       mInWhitespace = true;
    1684                 :       
    1685            1160 :       offsetIntoBuffer = str.get() + bol;
    1686            1160 :       if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
    1687                 :         // Preserve the real whitespace character
    1688               0 :         nextpos++;
    1689               0 :         AddToLine(offsetIntoBuffer, nextpos-bol);
    1690               0 :         bol = nextpos;
    1691                 :       } 
    1692                 :       else {
    1693                 :         // Replace the whitespace with a space
    1694            1160 :         AddToLine(offsetIntoBuffer, nextpos-bol);
    1695            1160 :         AddToLine(kSpace.get(),1);
    1696            1160 :         bol = nextpos + 1; // Let's eat the whitespace
    1697                 :       }
    1698                 :     }
    1699                 :   } // Continue looping over the string
    1700                 : }
    1701                 : 
    1702                 : 
    1703                 : /**
    1704                 :  * Gets the value of an attribute in a string. If the function returns
    1705                 :  * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
    1706                 :  */
    1707                 : nsresult
    1708             280 : nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
    1709                 :                                          nsString& aValueRet)
    1710                 : {
    1711             280 :   if (mElement) {
    1712             280 :     if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
    1713               0 :       return NS_OK;
    1714                 :     }
    1715                 :   }
    1716                 : 
    1717             280 :   return NS_ERROR_NOT_AVAILABLE;
    1718                 : }
    1719                 : 
    1720                 : /**
    1721                 :  * Returns true, if the element was inserted by Moz' TXT->HTML converter.
    1722                 :  * In this case, we should ignore it.
    1723                 :  */
    1724                 : bool 
    1725               4 : nsPlainTextSerializer::IsCurrentNodeConverted()
    1726                 : {
    1727               8 :   nsAutoString value;
    1728               4 :   nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
    1729               4 :   return (NS_SUCCEEDED(rv) &&
    1730               0 :           (value.EqualsIgnoreCase("moz-txt", 7) ||
    1731               4 :            value.EqualsIgnoreCase("\"moz-txt", 8)));
    1732                 : }
    1733                 : 
    1734                 : 
    1735                 : // static
    1736                 : nsIAtom*
    1737            1560 : nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
    1738                 : {
    1739            1560 :   if (!aContent->IsHTML()) {
    1740               0 :     return nsnull;
    1741                 :   }
    1742                 : 
    1743            1560 :   nsIAtom* localName = aContent->Tag();
    1744            1560 :   return localName->IsStaticAtom() ? localName : nsnull;
    1745                 : }
    1746                 : 
    1747                 : /**
    1748                 :  * Returns true if we currently are inside a <pre>. The check is done
    1749                 :  * by traversing the tag stack looking for <pre> until we hit a block
    1750                 :  * level tag which is assumed to override any <pre>:s below it in
    1751                 :  * the stack. To do this correctly to a 100% would require access
    1752                 :  * to style which we don't support in this converter.
    1753                 :  */  
    1754                 : bool
    1755             289 : nsPlainTextSerializer::IsInPre()
    1756                 : {
    1757             289 :   PRInt32 i = mTagStackIndex;
    1758            1097 :   while(i > 0) {
    1759             549 :     if (mTagStack[i - 1] == nsGkAtoms::pre)
    1760               0 :       return true;
    1761             549 :     if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) {
    1762                 :       // We assume that every other block overrides a <pre>
    1763              30 :       return false;
    1764                 :     }
    1765             519 :     --i;
    1766                 :   }
    1767                 : 
    1768                 :   // Not a <pre> in the whole stack
    1769             259 :   return false;
    1770                 : }
    1771                 : 
    1772                 : /**
    1773                 :  * This method is required only to identify LI's inside OL.
    1774                 :  * Returns TRUE if we are inside an OL tag and FALSE otherwise.
    1775                 :  */
    1776                 : bool
    1777               0 : nsPlainTextSerializer::IsInOL()
    1778                 : {
    1779               0 :   PRInt32 i = mTagStackIndex;
    1780               0 :   while(--i >= 0) {
    1781               0 :     if (mTagStack[i] == nsGkAtoms::ol)
    1782               0 :       return true;
    1783               0 :     if (mTagStack[i] == nsGkAtoms::ul) {
    1784                 :       // If a UL is reached first, LI belongs the UL nested in OL.
    1785               0 :       return false;
    1786                 :     }
    1787                 :   }
    1788                 :   // We may reach here for orphan LI's.
    1789               0 :   return false;
    1790                 : }
    1791                 : 
    1792                 : /*
    1793                 :   @return 0 = no header, 1 = h1, ..., 6 = h6
    1794                 : */
    1795               0 : PRInt32 HeaderLevel(nsIAtom* aTag)
    1796                 : {
    1797               0 :   if (aTag == nsGkAtoms::h1) {
    1798               0 :     return 1;
    1799                 :   }
    1800               0 :   if (aTag == nsGkAtoms::h2) {
    1801               0 :     return 2;
    1802                 :   }
    1803               0 :   if (aTag == nsGkAtoms::h3) {
    1804               0 :     return 3;
    1805                 :   }
    1806               0 :   if (aTag == nsGkAtoms::h4) {
    1807               0 :     return 4;
    1808                 :   }
    1809               0 :   if (aTag == nsGkAtoms::h5) {
    1810               0 :     return 5;
    1811                 :   }
    1812               0 :   if (aTag == nsGkAtoms::h6) {
    1813               0 :     return 6;
    1814                 :   }
    1815               0 :   return 0;
    1816                 : }
    1817                 : 
    1818                 : 
    1819                 : /*
    1820                 :  * This is an implementation of GetUnicharWidth() and
    1821                 :  * GetUnicharStringWidth() as defined in
    1822                 :  * "The Single UNIX Specification, Version 2, The Open Group, 1997"
    1823                 :  * <http://www.UNIX-systems.org/online.html>
    1824                 :  *
    1825                 :  * Markus Kuhn -- 2000-02-08 -- public domain
    1826                 :  *
    1827                 :  * Minor alterations to fit Mozilla's data types by Daniel Bratell
    1828                 :  */
    1829                 : 
    1830                 : /* These functions define the column width of an ISO 10646 character
    1831                 :  * as follows:
    1832                 :  *
    1833                 :  *    - The null character (U+0000) has a column width of 0.
    1834                 :  *
    1835                 :  *    - Other C0/C1 control characters and DEL will lead to a return
    1836                 :  *      value of -1.
    1837                 :  *
    1838                 :  *    - Non-spacing and enclosing combining characters (general
    1839                 :  *      category code Mn or Me in the Unicode database) have a
    1840                 :  *      column width of 0.
    1841                 :  *
    1842                 :  *    - Spacing characters in the East Asian Wide (W) or East Asian
    1843                 :  *      FullWidth (F) category as defined in Unicode Technical
    1844                 :  *      Report #11 have a column width of 2.
    1845                 :  *
    1846                 :  *    - All remaining characters (including all printable
    1847                 :  *      ISO 8859-1 and WGL4 characters, Unicode control characters,
    1848                 :  *      etc.) have a column width of 1.
    1849                 :  *
    1850                 :  * This implementation assumes that wchar_t characters are encoded
    1851                 :  * in ISO 10646.
    1852                 :  */
    1853                 : 
    1854             157 : PRInt32 GetUnicharWidth(PRUnichar ucs)
    1855                 : {
    1856                 :   /* sorted list of non-overlapping intervals of non-spacing characters */
    1857                 :   static const struct interval {
    1858                 :     PRUint16 first;
    1859                 :     PRUint16 last;
    1860                 :   } combining[] = {
    1861                 :     { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
    1862                 :     { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
    1863                 :     { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
    1864                 :     { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
    1865                 :     { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
    1866                 :     { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
    1867                 :     { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
    1868                 :     { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
    1869                 :     { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
    1870                 :     { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
    1871                 :     { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
    1872                 :     { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
    1873                 :     { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
    1874                 :     { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
    1875                 :     { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
    1876                 :     { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
    1877                 :     { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
    1878                 :     { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
    1879                 :     { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
    1880                 :     { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
    1881                 :     { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
    1882                 :     { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
    1883                 :     { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
    1884                 :     { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
    1885                 :     { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
    1886                 :     { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
    1887                 :     { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
    1888                 :     { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
    1889                 :     { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
    1890                 :     { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
    1891                 :     { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
    1892                 :   };
    1893             157 :   PRInt32 min = 0;
    1894             157 :   PRInt32 max = sizeof(combining) / sizeof(struct interval) - 1;
    1895                 :   PRInt32 mid;
    1896                 : 
    1897                 :   /* test for 8-bit control characters */
    1898             157 :   if (ucs == 0)
    1899               0 :     return 0;
    1900             157 :   if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
    1901               0 :     return -1;
    1902                 : 
    1903                 :   /* first quick check for Latin-1 etc. characters */
    1904             157 :   if (ucs < combining[0].first)
    1905             109 :     return 1;
    1906                 : 
    1907                 :   /* binary search in table of non-spacing characters */
    1908             384 :   while (max >= min) {
    1909             288 :     mid = (min + max) / 2;
    1910             288 :     if (combining[mid].last < ucs)
    1911             240 :       min = mid + 1;
    1912              48 :     else if (combining[mid].first > ucs)
    1913              48 :       max = mid - 1;
    1914               0 :     else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
    1915               0 :       return 0;
    1916                 :   }
    1917                 : 
    1918                 :   /* if we arrive here, ucs is not a combining or C0/C1 control character */
    1919                 : 
    1920                 :   /* fast test for majority of non-wide scripts */
    1921              48 :   if (ucs < 0x1100)
    1922               0 :     return 1;
    1923                 : 
    1924                 :   return 1 +
    1925                 :     ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
    1926                 :      (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
    1927                 :       ucs != 0x303f) ||                  /* CJK ... Yi */
    1928                 :      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
    1929                 :      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
    1930                 :      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
    1931                 :      (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
    1932              48 :      (ucs >= 0xffe0 && ucs <= 0xffe6));
    1933                 : }
    1934                 : 
    1935                 : 
    1936              26 : PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n)
    1937                 : {
    1938              26 :   PRInt32 w, width = 0;
    1939                 : 
    1940             172 :   for (;*pwcs && n-- > 0; pwcs++)
    1941             146 :     if ((w = GetUnicharWidth(*pwcs)) < 0)
    1942               0 :       ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
    1943                 :     else
    1944             146 :       width += w;
    1945                 : 
    1946              26 :   return width;
    1947                 : }
    1948                 : 

Generated by: LCOV version 1.7