1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is inline spellchecker code.
16 : *
17 : * The Initial Developer of the Original Code is Google Inc.
18 : * Portions created by the Initial Developer are Copyright (C) 2004-2006
19 : * the Initial Developer. All Rights Reserved.
20 : *
21 : * Contributor(s):
22 : * Brett Wilson <brettw@gmail.com> (original author)
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either the GNU General Public License Version 2 or later (the "GPL"), or
26 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsCOMPtr.h"
39 : #include "nsIDOMDocument.h"
40 : #include "nsIDocument.h"
41 : #include "nsString.h"
42 : #include "nsTArray.h"
43 :
44 : //#define DEBUG_SPELLCHECK
45 :
46 : class nsRange;
47 : class nsINode;
48 :
49 : /**
50 : * This class extracts text from the DOM and builds it into a single string.
51 : * The string includes whitespace breaks whereever non-inline elements begin
52 : * and end. This string is broken into "real words", following somewhat
53 : * complex rules; for example substrings that look like URLs or
54 : * email addresses are treated as single words, but otherwise many kinds of
55 : * punctuation are treated as word separators. GetNextWord provides a way
56 : * to iterate over these "real words".
57 : *
58 : * The basic operation is:
59 : *
60 : * 1. Call Init with the weak pointer to the editor that you're using.
61 : * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop
62 : * at the word boundary after that. If SetEnd is not called, we'll stop
63 : * at the end of the document's root element.
64 : * 3. Call SetPosition to initialize the current position inside the
65 : * previously given range.
66 : * 4. Call GetNextWord over and over until it returns false.
67 : */
68 :
69 : class mozInlineSpellWordUtil
70 0 : {
71 : public:
72 : struct NodeOffset {
73 : nsINode* mNode;
74 : PRInt32 mOffset;
75 :
76 0 : NodeOffset(nsINode* aNode, PRInt32 aOffset) :
77 0 : mNode(aNode), mOffset(aOffset) {}
78 : };
79 :
80 0 : mozInlineSpellWordUtil()
81 : : mRootNode(nsnull),
82 : mSoftBegin(nsnull, 0), mSoftEnd(nsnull, 0),
83 0 : mNextWordIndex(-1), mSoftTextValid(false) {}
84 :
85 : nsresult Init(nsWeakPtr aWeakEditor);
86 :
87 : nsresult SetEnd(nsINode* aEndNode, PRInt32 aEndOffset);
88 :
89 : // sets the current position, this should be inside the range. If we are in
90 : // the middle of a word, we'll move to its start.
91 : nsresult SetPosition(nsINode* aNode, PRInt32 aOffset);
92 :
93 : // Given a point inside or immediately following a word, this returns the
94 : // DOM range that exactly encloses that word's characters. The current
95 : // position will be at the end of the word. This will find the previous
96 : // word if the current position is space, so if you care that the point is
97 : // inside the word, you should check the range.
98 : //
99 : // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
100 : // before you actually generate the range you are interested in and iterate
101 : // the words in it.
102 : nsresult GetRangeForWord(nsIDOMNode* aWordNode, PRInt32 aWordOffset,
103 : nsRange** aRange);
104 :
105 : // Moves to the the next word in the range, and retrieves it's text and range.
106 : // An empty word and a NULL range are returned when we are done checking.
107 : // aSkipChecking will be set if the word is "special" and shouldn't be
108 : // checked (e.g., an email address).
109 : nsresult GetNextWord(nsAString& aText, nsRange** aRange,
110 : bool* aSkipChecking);
111 :
112 : // Call to normalize some punctuation. This function takes an autostring
113 : // so we can access characters directly.
114 : static void NormalizeWord(nsSubstring& aWord);
115 :
116 : nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; }
117 : nsIDocument* GetDocument() const { return mDocument; }
118 0 : nsINode* GetRootNode() { return mRootNode; }
119 :
120 : private:
121 :
122 : // cached stuff for the editor, set by Init
123 : nsCOMPtr<nsIDOMDocument> mDOMDocument;
124 : nsCOMPtr<nsIDocument> mDocument;
125 :
126 : // range to check, see SetPosition and SetEnd
127 : nsINode* mRootNode;
128 : NodeOffset mSoftBegin;
129 : NodeOffset mSoftEnd;
130 :
131 : // DOM text covering the soft range, with newlines added at block boundaries
132 : nsString mSoftText;
133 : // A list of where we extracted text from, ordered by mSoftTextOffset. A given
134 : // DOM node appears at most once in this list.
135 0 : struct DOMTextMapping {
136 : NodeOffset mNodeOffset;
137 : PRInt32 mSoftTextOffset;
138 : PRInt32 mLength;
139 :
140 0 : DOMTextMapping(NodeOffset aNodeOffset, PRInt32 aSoftTextOffset, PRInt32 aLength)
141 : : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
142 0 : mLength(aLength) {}
143 : };
144 : nsTArray<DOMTextMapping> mSoftTextDOMMapping;
145 :
146 : // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
147 0 : struct RealWord {
148 : PRInt32 mSoftTextOffset;
149 : PRInt32 mLength;
150 : bool mCheckableWord;
151 :
152 0 : RealWord(PRInt32 aOffset, PRInt32 aLength, bool aCheckable)
153 0 : : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}
154 0 : PRInt32 EndOffset() const { return mSoftTextOffset + mLength; }
155 : };
156 : nsTArray<RealWord> mRealWords;
157 : PRInt32 mNextWordIndex;
158 :
159 : bool mSoftTextValid;
160 :
161 0 : void InvalidateWords() { mSoftTextValid = false; }
162 : void EnsureWords();
163 :
164 : PRInt32 MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
165 : // Map an offset into mSoftText to a DOM position. Note that two DOM positions
166 : // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
167 : // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
168 : // aHintBefore controls which position we return ... if aHint is eEnd
169 : // then the position indicates the END of a range so we return (A,4). Otherwise
170 : // the position indicates the START of a range so we return (B,0).
171 : enum DOMMapHint { HINT_BEGIN, HINT_END };
172 : NodeOffset MapSoftTextOffsetToDOMPosition(PRInt32 aSoftTextOffset,
173 : DOMMapHint aHint);
174 : // Finds the index of the real word containing aSoftTextOffset, or -1 if none
175 : // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
176 : // later word (favouring the assumption that it's the BEGINning of a word),
177 : // otherwise return the earlier word (assuming it's the END of a word).
178 : // If aSearchForward is true, then if we don't find a word at the given
179 : // position, search forward until we do find a word and return that (if found).
180 : PRInt32 FindRealWordContaining(PRInt32 aSoftTextOffset, DOMMapHint aHint,
181 : bool aSearchForward);
182 :
183 : // build mSoftText and mSoftTextDOMMapping
184 : void BuildSoftText();
185 : // Build mRealWords array
186 : void BuildRealWords();
187 :
188 : void SplitDOMWord(PRInt32 aStart, PRInt32 aEnd);
189 :
190 : // Convenience functions, object must be initialized
191 : nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange);
192 : nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange);
193 : };
|