1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : /*
39 : * nsIContentSerializer implementation that can be used with an
40 : * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
41 : * (eg for copy/paste as plaintext).
42 : */
43 :
44 : #ifndef nsPlainTextSerializer_h__
45 : #define nsPlainTextSerializer_h__
46 :
47 : #include "nsIContentSerializer.h"
48 : #include "nsCOMPtr.h"
49 : #include "nsString.h"
50 : #include "nsILineBreaker.h"
51 : #include "nsIContent.h"
52 : #include "nsIAtom.h"
53 : #include "nsIDocumentEncoder.h"
54 : #include "nsTArray.h"
55 :
56 : namespace mozilla {
57 : namespace dom {
58 : class Element;
59 : } // namespace dom
60 : } // namespace mozilla
61 :
62 : class nsPlainTextSerializer : public nsIContentSerializer
63 : {
64 : public:
65 : nsPlainTextSerializer();
66 : virtual ~nsPlainTextSerializer();
67 :
68 : NS_DECL_ISUPPORTS
69 :
70 : // nsIContentSerializer
71 : NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn,
72 : const char* aCharSet, bool aIsCopying,
73 : bool aIsWholeDocument);
74 :
75 : NS_IMETHOD AppendText(nsIContent* aText, PRInt32 aStartOffset,
76 : PRInt32 aEndOffset, nsAString& aStr);
77 : NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
78 : PRInt32 aStartOffset, PRInt32 aEndOffset,
79 : nsAString& aStr);
80 0 : NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI,
81 : PRInt32 aStartOffset,
82 : PRInt32 aEndOffset,
83 0 : nsAString& aStr) { return NS_OK; }
84 0 : NS_IMETHOD AppendComment(nsIContent* aComment, PRInt32 aStartOffset,
85 0 : PRInt32 aEndOffset, nsAString& aStr) { return NS_OK; }
86 0 : NS_IMETHOD AppendDoctype(nsIContent *aDoctype,
87 0 : nsAString& aStr) { return NS_OK; }
88 : NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
89 : mozilla::dom::Element* aOriginalElement,
90 : nsAString& aStr);
91 : NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
92 : nsAString& aStr);
93 : NS_IMETHOD Flush(nsAString& aStr);
94 :
95 : NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
96 : nsAString& aStr);
97 :
98 : protected:
99 : nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet);
100 : void AddToLine(const PRUnichar* aStringToAdd, PRInt32 aLength);
101 : void EndLine(bool softlinebreak, bool aBreakBySpace = false);
102 : void EnsureVerticalSpace(PRInt32 noOfRows);
103 : void FlushLine();
104 : void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
105 : void Output(nsString& aString);
106 : void Write(const nsAString& aString);
107 : bool IsInPre();
108 : bool IsInOL();
109 : bool IsCurrentNodeConverted();
110 : bool MustSuppressLeaf();
111 :
112 : /**
113 : * Returns the local name of the element as an atom if the element is an
114 : * HTML element and the atom is a static atom. Otherwise, nsnull is returned.
115 : */
116 : static nsIAtom* GetIdForContent(nsIContent* aContent);
117 : nsresult DoOpenContainer(nsIAtom* aTag);
118 : nsresult DoCloseContainer(nsIAtom* aTag);
119 : nsresult DoAddLeaf(nsIAtom* aTag);
120 : void DoAddText(bool aIsWhitespace, const nsAString& aText);
121 :
122 : // Inlined functions
123 5444 : inline bool MayWrap()
124 : {
125 : return mWrapColumn &&
126 : ((mFlags & nsIDocumentEncoder::OutputFormatted) ||
127 5444 : (mFlags & nsIDocumentEncoder::OutputWrap));
128 : }
129 :
130 1104 : inline bool DoOutput()
131 : {
132 1104 : return mHeadLevel == 0;
133 : }
134 :
135 : // Stack handling functions
136 : bool GetLastBool(const nsTArray<bool>& aStack);
137 : void SetLastBool(nsTArray<bool>& aStack, bool aValue);
138 : void PushBool(nsTArray<bool>& aStack, bool aValue);
139 : bool PopBool(nsTArray<bool>& aStack);
140 :
141 : protected:
142 : nsString mCurrentLine;
143 : PRUint32 mHeadLevel;
144 : bool mAtFirstColumn;
145 :
146 : // Handling of quoted text (for mail):
147 : // Quotes need to be wrapped differently from non-quoted text,
148 : // because quoted text has a few extra characters (e.g. ">> ")
149 : // which makes the line length longer.
150 : // Mail can represent quotes in different ways:
151 : // Not wrapped in any special tag (if mail.compose.wrap_to_window_width)
152 : // or in a <span>.
153 : bool mDontWrapAnyQuotes; // no special quote markers
154 :
155 : bool mStructs; // Output structs (pref)
156 :
157 : // If we've just written out a cite blockquote, we need to remember it
158 : // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
159 : // old messages).
160 : bool mHasWrittenCiteBlockquote;
161 :
162 : PRInt32 mIndent;
163 : // mInIndentString keeps a header that has to be written in the indent.
164 : // That could be, for instance, the bullet in a bulleted list.
165 : nsString mInIndentString;
166 : PRInt32 mCiteQuoteLevel;
167 : PRInt32 mFlags;
168 : PRInt32 mFloatingLines; // To store the number of lazy line breaks
169 :
170 : // The wrap column is how many standard sized chars (western languages)
171 : // should be allowed on a line. There could be less chars if the chars
172 : // are wider than latin chars of more if the chars are more narrow.
173 : PRUint32 mWrapColumn;
174 :
175 : // The width of the line as it will appear on the screen (approx.)
176 : PRUint32 mCurrentLineWidth;
177 :
178 : // Treat quoted text as though it's preformatted -- don't wrap it.
179 : // Having it on a pref is a temporary measure, See bug 69638.
180 : PRInt32 mSpanLevel;
181 :
182 :
183 : PRInt32 mEmptyLines; // Will be the number of empty lines before
184 : // the current. 0 if we are starting a new
185 : // line and -1 if we are in a line.
186 :
187 : bool mInWhitespace;
188 : bool mPreFormatted;
189 : bool mStartedOutput; // we've produced at least a character
190 :
191 : // While handling a new tag, this variable should remind if any line break
192 : // is due because of a closing tag. Setting it to "TRUE" while closing the tags.
193 : // Hence opening tags are guaranteed to start with appropriate line breaks.
194 : bool mLineBreakDue;
195 :
196 : nsString mURL;
197 : PRInt32 mHeaderStrategy; /* Header strategy (pref)
198 : 0 = no indention
199 : 1 = indention, increased with
200 : header level (default)
201 : 2 = numbering and slight indention */
202 : PRInt32 mHeaderCounter[7]; /* For header-numbering:
203 : Number of previous headers of
204 : the same depth and in the same
205 : section.
206 : mHeaderCounter[1] for <h1> etc. */
207 :
208 : nsRefPtr<mozilla::dom::Element> mElement;
209 :
210 : // For handling table rows
211 : nsAutoTArray<bool, 8> mHasWrittenCellsForRow;
212 :
213 : // Values gotten in OpenContainer that is (also) needed in CloseContainer
214 : nsAutoTArray<bool, 8> mIsInCiteBlockquote;
215 :
216 : // The output data
217 : nsAString* mOutputString;
218 :
219 : // The tag stack: the stack of tags we're operating on, so we can nest.
220 : // The stack only ever points to static atoms, so they don't need to be
221 : // refcounted.
222 : nsIAtom** mTagStack;
223 : PRUint32 mTagStackIndex;
224 :
225 : // Content in the stack above this index should be ignored:
226 : PRUint32 mIgnoreAboveIndex;
227 :
228 : // The stack for ordered lists
229 : PRInt32 *mOLStack;
230 : PRUint32 mOLStackIndex;
231 :
232 : PRUint32 mULCount;
233 :
234 : nsString mLineBreak;
235 : nsCOMPtr<nsILineBreaker> mLineBreaker;
236 :
237 : // Conveniance constant. It would be nice to have it as a const static
238 : // variable, but that causes issues with OpenBSD and module unloading.
239 : const nsString kSpace;
240 : };
241 :
242 : nsresult
243 : NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
244 :
245 : #endif
|