LCOV - code coverage report
Current view: directory - extensions/spellcheck/src - mozInlineSpellWordUtil.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 420 0 0.0 %
Date: 2012-06-02 Functions: 34 0 0.0 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is inline spellchecker code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is Google Inc.
      18                 :  * Portions created by the Initial Developer are Copyright (C) 2004-2006
      19                 :  * the Initial Developer. All Rights Reserved.
      20                 :  *
      21                 :  * Contributor(s):
      22                 :  *   Brett Wilson <brettw@gmail.com> (original author)
      23                 :  *   Robert O'Callahan <rocallahan@novell.com>
      24                 :  *   Ms2ger <ms2ger@gmail.com>
      25                 :  *
      26                 :  * Alternatively, the contents of this file may be used under the terms of
      27                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      28                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      29                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      30                 :  * of those above. If you wish to allow use of your version of this file only
      31                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      32                 :  * use your version of this file under the terms of the MPL, indicate your
      33                 :  * decision by deleting the provisions above and replace them with the notice
      34                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      35                 :  * the provisions above, a recipient may use your version of this file under
      36                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      37                 :  *
      38                 :  * ***** END LICENSE BLOCK ***** */
      39                 : 
      40                 : #include "mozInlineSpellWordUtil.h"
      41                 : #include "nsDebug.h"
      42                 : #include "nsIAtom.h"
      43                 : #include "nsComponentManagerUtils.h"
      44                 : #include "nsIDOMCSSStyleDeclaration.h"
      45                 : #include "nsIDOMElement.h"
      46                 : #include "nsIDOMRange.h"
      47                 : #include "nsIEditor.h"
      48                 : #include "nsIDOMNode.h"
      49                 : #include "nsIDOMHTMLBRElement.h"
      50                 : #include "nsUnicharUtilCIID.h"
      51                 : #include "nsUnicodeProperties.h"
      52                 : #include "nsServiceManagerUtils.h"
      53                 : #include "nsIContent.h"
      54                 : #include "nsTextFragment.h"
      55                 : #include "mozilla/dom/Element.h"
      56                 : #include "nsIFrame.h"
      57                 : #include "nsRange.h"
      58                 : #include "nsContentUtils.h"
      59                 : 
      60                 : using namespace mozilla;
      61                 : 
      62                 : // IsIgnorableCharacter
      63                 : //
      64                 : //    These characters are ones that we should ignore in input.
      65                 : 
      66               0 : inline bool IsIgnorableCharacter(PRUnichar ch)
      67                 : {
      68                 :   return (ch == 0x200D || // ZERO-WIDTH JOINER
      69                 :           ch == 0xAD ||   // SOFT HYPHEN
      70               0 :           ch == 0x1806);  // MONGOLIAN TODO SOFT HYPHEN
      71                 : }
      72                 : 
      73                 : // IsConditionalPunctuation
      74                 : //
      75                 : //    Some characters (like apostrophes) require characters on each side to be
      76                 : //    part of a word, and are otherwise punctuation.
      77                 : 
      78               0 : inline bool IsConditionalPunctuation(PRUnichar ch)
      79                 : {
      80                 :   return (ch == '\'' ||
      81               0 :           ch == 0x2019); // RIGHT SINGLE QUOTATION MARK
      82                 : }
      83                 : 
      84                 : // mozInlineSpellWordUtil::Init
      85                 : 
      86                 : nsresult
      87               0 : mozInlineSpellWordUtil::Init(nsWeakPtr aWeakEditor)
      88                 : {
      89                 :   nsresult rv;
      90                 : 
      91                 :   // getting the editor can fail commonly because the editor was detached, so
      92                 :   // don't assert
      93               0 :   nsCOMPtr<nsIEditor> editor = do_QueryReferent(aWeakEditor, &rv);
      94               0 :   if (NS_FAILED(rv))
      95               0 :     return rv;
      96                 : 
      97               0 :   nsCOMPtr<nsIDOMDocument> domDoc;
      98               0 :   rv = editor->GetDocument(getter_AddRefs(domDoc));
      99               0 :   NS_ENSURE_SUCCESS(rv, rv);
     100               0 :   NS_ENSURE_TRUE(domDoc, NS_ERROR_NULL_POINTER);
     101                 : 
     102               0 :   mDOMDocument = domDoc;
     103               0 :   mDocument = do_QueryInterface(domDoc);
     104                 : 
     105                 :   // Find the root node for the editor. For contenteditable we'll need something
     106                 :   // cleverer here.
     107               0 :   nsCOMPtr<nsIDOMElement> rootElt;
     108               0 :   rv = editor->GetRootElement(getter_AddRefs(rootElt));
     109               0 :   NS_ENSURE_SUCCESS(rv, rv);
     110                 : 
     111               0 :   nsCOMPtr<nsINode> rootNode = do_QueryInterface(rootElt);
     112               0 :   mRootNode = rootNode;
     113               0 :   NS_ASSERTION(mRootNode, "GetRootElement returned null *and* claimed to suceed!");
     114               0 :   return NS_OK;
     115                 : }
     116                 : 
     117                 : static inline bool
     118               0 : IsTextNode(nsINode* aNode)
     119                 : {
     120               0 :   return aNode->IsNodeOfType(nsINode::eTEXT);
     121                 : }
     122                 : 
     123                 : typedef void (* OnLeaveNodeFunPtr)(nsINode* aNode, void* aClosure);
     124                 : 
     125                 : // Find the next node in the DOM tree in preorder.
     126                 : // Calls OnLeaveNodeFunPtr when the traversal leaves a node, which is
     127                 : // why we can't just use GetNextNode here, sadly.
     128                 : static nsINode*
     129               0 : FindNextNode(nsINode* aNode, nsINode* aRoot,
     130                 :              OnLeaveNodeFunPtr aOnLeaveNode, void* aClosure)
     131                 : {
     132               0 :   NS_PRECONDITION(aNode, "Null starting node?");
     133                 : 
     134               0 :   nsINode* next = aNode->GetFirstChild();
     135               0 :   if (next)
     136               0 :     return next;
     137                 :   
     138                 :   // Don't look at siblings or otherwise outside of aRoot
     139               0 :   if (aNode == aRoot)
     140               0 :     return nsnull;
     141                 : 
     142               0 :   next = aNode->GetNextSibling();
     143               0 :   if (next)
     144               0 :     return next;
     145                 : 
     146                 :   // Go up
     147               0 :   for (;;) {
     148               0 :     if (aOnLeaveNode) {
     149               0 :       aOnLeaveNode(aNode, aClosure);
     150                 :     }
     151                 :     
     152               0 :     next = aNode->GetParent();
     153               0 :     if (next == aRoot || ! next)
     154               0 :       return nsnull;
     155               0 :     aNode = next;
     156                 :     
     157               0 :     next = aNode->GetNextSibling();
     158               0 :     if (next)
     159               0 :       return next;
     160                 :   }
     161                 : }
     162                 : 
     163                 : // aNode is not a text node. Find the first text node starting at aNode/aOffset
     164                 : // in a preorder DOM traversal.
     165                 : static nsINode*
     166               0 : FindNextTextNode(nsINode* aNode, PRInt32 aOffset, nsINode* aRoot)
     167                 : {
     168               0 :   NS_PRECONDITION(aNode, "Null starting node?");
     169               0 :   NS_ASSERTION(!IsTextNode(aNode), "FindNextTextNode should start with a non-text node");
     170                 : 
     171                 :   nsINode* checkNode;
     172                 :   // Need to start at the aOffset'th child
     173               0 :   nsIContent* child = aNode->GetChildAt(aOffset);
     174                 : 
     175               0 :   if (child) {
     176               0 :     checkNode = child;
     177                 :   } else {
     178                 :     // aOffset was beyond the end of the child list. 
     179                 :     // goto next node after the last descendant of aNode in
     180                 :     // a preorder DOM traversal.
     181               0 :     checkNode = aNode->GetNextNonChildNode(aRoot);
     182                 :   }
     183                 :   
     184               0 :   while (checkNode && !IsTextNode(checkNode)) {
     185               0 :     checkNode = checkNode->GetNextNode(aRoot);
     186                 :   }
     187               0 :   return checkNode;
     188                 : }
     189                 : 
     190                 : // mozInlineSpellWordUtil::SetEnd
     191                 : //
     192                 : //    We have two ranges "hard" and "soft". The hard boundary is simply
     193                 : //    the scope of the root node. The soft boundary is that which is set
     194                 : //    by the caller of this class by calling this function. If this function is
     195                 : //    not called, the soft boundary is the same as the hard boundary.
     196                 : //
     197                 : //    When we reach the soft boundary (mSoftEnd), we keep
     198                 : //    going until we reach the end of a word. This allows the caller to set the
     199                 : //    end of the range to anything, and we will always check whole multiples of
     200                 : //    words. When we reach the hard boundary we stop no matter what.
     201                 : //
     202                 : //    There is no beginning soft boundary. This is because we only go to the
     203                 : //    previous node once, when finding the previous word boundary in
     204                 : //    SetPosition(). You might think of the soft boundary as being this initial
     205                 : //    position.
     206                 : 
     207                 : nsresult
     208               0 : mozInlineSpellWordUtil::SetEnd(nsINode* aEndNode, PRInt32 aEndOffset)
     209                 : {
     210               0 :   NS_PRECONDITION(aEndNode, "Null end node?");
     211                 : 
     212               0 :   NS_ASSERTION(mRootNode, "Not initialized");
     213                 : 
     214               0 :   InvalidateWords();
     215                 : 
     216               0 :   if (!IsTextNode(aEndNode)) {
     217                 :     // End at the start of the first text node after aEndNode/aEndOffset.
     218               0 :     aEndNode = FindNextTextNode(aEndNode, aEndOffset, mRootNode);
     219               0 :     aEndOffset = 0;
     220                 :   }
     221               0 :   mSoftEnd = NodeOffset(aEndNode, aEndOffset);
     222               0 :   return NS_OK;
     223                 : }
     224                 : 
     225                 : nsresult
     226               0 : mozInlineSpellWordUtil::SetPosition(nsINode* aNode, PRInt32 aOffset)
     227                 : {
     228               0 :   InvalidateWords();
     229                 : 
     230               0 :   if (!IsTextNode(aNode)) {
     231                 :     // Start at the start of the first text node after aNode/aOffset.
     232               0 :     aNode = FindNextTextNode(aNode, aOffset, mRootNode);
     233               0 :     aOffset = 0;
     234                 :   }
     235               0 :   mSoftBegin = NodeOffset(aNode, aOffset);
     236                 : 
     237               0 :   EnsureWords();
     238                 :   
     239               0 :   PRInt32 textOffset = MapDOMPositionToSoftTextOffset(mSoftBegin);
     240               0 :   if (textOffset < 0)
     241               0 :     return NS_OK;
     242               0 :   mNextWordIndex = FindRealWordContaining(textOffset, HINT_END, true);
     243               0 :   return NS_OK;
     244                 : }
     245                 : 
     246                 : void
     247               0 : mozInlineSpellWordUtil::EnsureWords()
     248                 : {
     249               0 :   if (mSoftTextValid)
     250               0 :     return;
     251               0 :   BuildSoftText();
     252               0 :   BuildRealWords();
     253               0 :   mSoftTextValid = true;
     254                 : }
     255                 : 
     256                 : nsresult
     257               0 : mozInlineSpellWordUtil::MakeRangeForWord(const RealWord& aWord, nsRange** aRange)
     258                 : {
     259               0 :   NodeOffset begin = MapSoftTextOffsetToDOMPosition(aWord.mSoftTextOffset, HINT_BEGIN);
     260               0 :   NodeOffset end = MapSoftTextOffsetToDOMPosition(aWord.EndOffset(), HINT_END);
     261               0 :   return MakeRange(begin, end, aRange);
     262                 : }
     263                 : 
     264                 : // mozInlineSpellWordUtil::GetRangeForWord
     265                 : 
     266                 : nsresult
     267               0 : mozInlineSpellWordUtil::GetRangeForWord(nsIDOMNode* aWordNode,
     268                 :                                         PRInt32 aWordOffset,
     269                 :                                         nsRange** aRange)
     270                 : {
     271                 :   // Set our soft end and start
     272               0 :   nsCOMPtr<nsINode> wordNode = do_QueryInterface(aWordNode);
     273               0 :   NodeOffset pt = NodeOffset(wordNode, aWordOffset);
     274                 :   
     275               0 :   InvalidateWords();
     276               0 :   mSoftBegin = mSoftEnd = pt;
     277               0 :   EnsureWords();
     278                 :   
     279               0 :   PRInt32 offset = MapDOMPositionToSoftTextOffset(pt);
     280               0 :   if (offset < 0)
     281               0 :     return MakeRange(pt, pt, aRange);
     282               0 :   PRInt32 wordIndex = FindRealWordContaining(offset, HINT_BEGIN, false);
     283               0 :   if (wordIndex < 0)
     284               0 :     return MakeRange(pt, pt, aRange);
     285               0 :   return MakeRangeForWord(mRealWords[wordIndex], aRange);
     286                 : }
     287                 : 
     288                 : // This is to fix characters that the spellchecker may not like
     289                 : static void
     290               0 : NormalizeWord(const nsSubstring& aInput, PRInt32 aPos, PRInt32 aLen, nsAString& aOutput)
     291                 : {
     292               0 :   aOutput.Truncate();
     293               0 :   for (PRInt32 i = 0; i < aLen; i++) {
     294               0 :     PRUnichar ch = aInput.CharAt(i + aPos);
     295                 : 
     296                 :     // remove ignorable characters from the word
     297               0 :     if (IsIgnorableCharacter(ch))
     298               0 :       continue;
     299                 : 
     300                 :     // the spellchecker doesn't handle curly apostrophes in all languages
     301               0 :     if (ch == 0x2019) { // RIGHT SINGLE QUOTATION MARK
     302               0 :       ch = '\'';
     303                 :     }
     304                 : 
     305               0 :     aOutput.Append(ch);
     306                 :   }
     307               0 : }
     308                 : 
     309                 : // mozInlineSpellWordUtil::GetNextWord
     310                 : //
     311                 : //    FIXME-optimization: we shouldn't have to generate a range every single
     312                 : //    time. It would be better if the inline spellchecker didn't require a
     313                 : //    range unless the word was misspelled. This may or may not be possible.
     314                 : 
     315                 : nsresult
     316               0 : mozInlineSpellWordUtil::GetNextWord(nsAString& aText, nsRange** aRange,
     317                 :                                     bool* aSkipChecking)
     318                 : {
     319                 : #ifdef DEBUG_SPELLCHECK
     320                 :   printf("GetNextWord called; mNextWordIndex=%d\n", mNextWordIndex);
     321                 : #endif
     322                 : 
     323               0 :   if (mNextWordIndex < 0 ||
     324               0 :       mNextWordIndex >= PRInt32(mRealWords.Length())) {
     325               0 :     mNextWordIndex = -1;
     326               0 :     *aRange = nsnull;
     327               0 :     *aSkipChecking = true;
     328               0 :     return NS_OK;
     329                 :   }
     330                 :   
     331               0 :   const RealWord& word = mRealWords[mNextWordIndex];
     332               0 :   nsresult rv = MakeRangeForWord(word, aRange);
     333               0 :   NS_ENSURE_SUCCESS(rv, rv);
     334               0 :   ++mNextWordIndex;
     335               0 :   *aSkipChecking = !word.mCheckableWord;
     336               0 :   ::NormalizeWord(mSoftText, word.mSoftTextOffset, word.mLength, aText);
     337                 : 
     338                 : #ifdef DEBUG_SPELLCHECK
     339                 :   printf("GetNextWord returning: %s (skip=%d)\n",
     340                 :          NS_ConvertUTF16toUTF8(aText).get(), *aSkipChecking);
     341                 : #endif
     342                 :   
     343               0 :   return NS_OK;
     344                 : }
     345                 : 
     346                 : // mozInlineSpellWordUtil::MakeRange
     347                 : //
     348                 : //    Convenience function for creating a range over the current document.
     349                 : 
     350                 : nsresult
     351               0 : mozInlineSpellWordUtil::MakeRange(NodeOffset aBegin, NodeOffset aEnd,
     352                 :                                   nsRange** aRange)
     353                 : {
     354               0 :   if (!mDOMDocument)
     355               0 :     return NS_ERROR_NOT_INITIALIZED;
     356                 : 
     357               0 :   nsRefPtr<nsRange> range = new nsRange();
     358                 :   nsresult rv = range->Set(aBegin.mNode, aBegin.mOffset,
     359               0 :                            aEnd.mNode, aEnd.mOffset);
     360               0 :   NS_ENSURE_SUCCESS(rv, rv);
     361               0 :   range.forget(aRange);
     362                 : 
     363               0 :   return NS_OK;
     364                 : }
     365                 : 
     366                 : /*********** DOM text extraction ************/
     367                 : 
     368                 : // IsDOMWordSeparator
     369                 : //
     370                 : //    Determines if the given character should be considered as a DOM Word
     371                 : //    separator. Basically, this is whitespace, although it could also have
     372                 : //    certain punctuation that we know ALWAYS breaks words. This is important.
     373                 : //    For example, we can't have any punctuation that could appear in a URL
     374                 : //    or email address in this, because those need to always fit into a single
     375                 : //    DOM word.
     376                 : 
     377                 : static bool
     378               0 : IsDOMWordSeparator(PRUnichar ch)
     379                 : {
     380                 :   // simple spaces
     381               0 :   if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
     382               0 :     return true;
     383                 : 
     384                 :   // complex spaces - check only if char isn't ASCII (uncommon)
     385               0 :   if (ch >= 0xA0 &&
     386                 :       (ch == 0x00A0 ||  // NO-BREAK SPACE
     387                 :        ch == 0x2002 ||  // EN SPACE
     388                 :        ch == 0x2003 ||  // EM SPACE
     389                 :        ch == 0x2009 ||  // THIN SPACE
     390                 :        ch == 0x200C ||  // ZERO WIDTH NON-JOINER
     391                 :        ch == 0x3000))   // IDEOGRAPHIC SPACE
     392               0 :     return true;
     393                 : 
     394                 :   // otherwise not a space
     395               0 :   return false;
     396                 : }
     397                 : 
     398                 : static inline bool
     399               0 : IsBRElement(nsINode* aNode)
     400                 : {
     401               0 :   return aNode->IsElement() &&
     402               0 :          aNode->AsElement()->IsHTML(nsGkAtoms::br);
     403                 : }
     404                 : 
     405                 : /**
     406                 :  * Check if there's a DOM word separator before aBeforeOffset in this node.
     407                 :  * Always returns true if it's a BR element.
     408                 :  * aSeparatorOffset is set to the index of the first character in the last
     409                 :  * separator if any is found (0 for BR elements).
     410                 :  *
     411                 :  * This function does not modify aSeparatorOffset when it returns false.
     412                 :  */
     413                 : static bool
     414               0 : ContainsDOMWordSeparator(nsINode* aNode, PRInt32 aBeforeOffset,
     415                 :                          PRInt32* aSeparatorOffset)
     416                 : {
     417               0 :   if (IsBRElement(aNode)) {
     418               0 :     *aSeparatorOffset = 0;
     419               0 :     return true;
     420                 :   }
     421                 :   
     422               0 :   if (!IsTextNode(aNode))
     423               0 :     return false;
     424                 : 
     425                 :   // aNode is actually an nsIContent, since it's eTEXT
     426               0 :   nsIContent* content = static_cast<nsIContent*>(aNode);
     427               0 :   const nsTextFragment* textFragment = content->GetText();
     428               0 :   NS_ASSERTION(textFragment, "Where is our text?");
     429               0 :   for (PRInt32 i = NS_MIN(aBeforeOffset, PRInt32(textFragment->GetLength())) - 1; i >= 0; --i) {
     430               0 :     if (IsDOMWordSeparator(textFragment->CharAt(i))) {
     431                 :       // Be greedy, find as many separators as we can
     432               0 :       for (PRInt32 j = i - 1; j >= 0; --j) {
     433               0 :         if (IsDOMWordSeparator(textFragment->CharAt(j))) {
     434               0 :           i = j;
     435                 :         } else {
     436               0 :           break;
     437                 :         }
     438                 :       }
     439               0 :       *aSeparatorOffset = i;
     440               0 :       return true;
     441                 :     }
     442                 :   }
     443               0 :   return false;
     444                 : }
     445                 : 
     446                 : static bool
     447               0 : IsBreakElement(nsINode* aNode)
     448                 : {
     449               0 :   if (!aNode->IsElement()) {
     450               0 :     return false;
     451                 :   }
     452                 : 
     453               0 :   dom::Element *element = aNode->AsElement();
     454                 :     
     455               0 :   if (element->IsHTML(nsGkAtoms::br))
     456               0 :     return true;
     457                 : 
     458                 :   // If we don't have a frame, we don't consider ourselves a break
     459                 :   // element.  In particular, words can span us.
     460               0 :   if (!element->GetPrimaryFrame())
     461               0 :     return false;
     462                 : 
     463                 :   // Anything that's not an inline element is a break element.
     464                 :   // XXXbz should replaced inlines be break elements, though?
     465               0 :   return element->GetPrimaryFrame()->GetStyleDisplay()->mDisplay !=
     466               0 :     NS_STYLE_DISPLAY_INLINE;
     467                 : }
     468                 : 
     469                 : struct CheckLeavingBreakElementClosure {
     470                 :   bool          mLeftBreakElement;
     471                 : };
     472                 : 
     473                 : static void
     474               0 : CheckLeavingBreakElement(nsINode* aNode, void* aClosure)
     475                 : {
     476                 :   CheckLeavingBreakElementClosure* cl =
     477               0 :     static_cast<CheckLeavingBreakElementClosure*>(aClosure);
     478               0 :   if (!cl->mLeftBreakElement && IsBreakElement(aNode)) {
     479               0 :     cl->mLeftBreakElement = true;
     480                 :   }
     481               0 : }
     482                 : 
     483                 : void
     484               0 : mozInlineSpellWordUtil::NormalizeWord(nsSubstring& aWord)
     485                 : {
     486               0 :   nsAutoString result;
     487               0 :   ::NormalizeWord(aWord, 0, aWord.Length(), result);
     488               0 :   aWord = result;
     489               0 : }
     490                 : 
     491                 : void
     492               0 : mozInlineSpellWordUtil::BuildSoftText()
     493                 : {
     494                 :   // First we have to work backwards from mSoftStart to find a text node
     495                 :   // containing a DOM word separator, a non-inline-element
     496                 :   // boundary, or the hard start node. That's where we'll start building the
     497                 :   // soft string from.
     498               0 :   nsINode* node = mSoftBegin.mNode;
     499               0 :   PRInt32 firstOffsetInNode = 0;
     500               0 :   PRInt32 checkBeforeOffset = mSoftBegin.mOffset;
     501               0 :   while (node) {
     502               0 :     if (ContainsDOMWordSeparator(node, checkBeforeOffset, &firstOffsetInNode)) {
     503               0 :       if (node == mSoftBegin.mNode) {
     504                 :         // If we find a word separator on the first node, look at the preceding
     505                 :         // word on the text node as well.
     506               0 :         PRInt32 newOffset = 0;
     507               0 :         if (firstOffsetInNode > 0) {
     508                 :           // Try to find the previous word boundary.  We ignore the return value
     509                 :           // of ContainsDOMWordSeparator here because there might be no preceding
     510                 :           // word separator (such as when we're at the end of the first word in
     511                 :           // the text node), in which case we just set the found offsets to 0.
     512                 :           // Otherwise, ContainsDOMWordSeparator finds us the correct word
     513                 :           // boundary so that we can avoid looking at too many words.
     514               0 :           ContainsDOMWordSeparator(node, firstOffsetInNode - 1, &newOffset);
     515                 :         }
     516               0 :         firstOffsetInNode = newOffset;
     517               0 :         mSoftBegin.mOffset = newOffset;
     518                 :       }
     519               0 :       break;
     520                 :     }
     521               0 :     checkBeforeOffset = PR_INT32_MAX;
     522               0 :     if (IsBreakElement(node)) {
     523                 :       // Since GetPreviousContent follows tree *preorder*, we're about to traverse
     524                 :       // up out of 'node'. Since node induces breaks (e.g., it's a block),
     525                 :       // don't bother trying to look outside it, just stop now.
     526               0 :       break;
     527                 :     }
     528                 :     // GetPreviousContent below expects mRootNode to be an ancestor of node.
     529               0 :     if (!nsContentUtils::ContentIsDescendantOf(node, mRootNode)) {
     530               0 :       break;
     531                 :     }
     532               0 :     node = node->GetPreviousContent(mRootNode);
     533                 :   }
     534                 : 
     535                 :   // Now build up the string moving forward through the DOM until we reach
     536                 :   // the soft end and *then* see a DOM word separator, a non-inline-element
     537                 :   // boundary, or the hard end node.
     538               0 :   mSoftText.Truncate();
     539               0 :   mSoftTextDOMMapping.Clear();
     540               0 :   bool seenSoftEnd = false;
     541                 :   // Leave this outside the loop so large heap string allocations can be reused
     542                 :   // across iterations
     543               0 :   while (node) {
     544               0 :     if (node == mSoftEnd.mNode) {
     545               0 :       seenSoftEnd = true;
     546                 :     }
     547                 : 
     548               0 :     bool exit = false;
     549               0 :     if (IsTextNode(node)) {
     550               0 :       nsIContent* content = static_cast<nsIContent*>(node);
     551               0 :       NS_ASSERTION(content, "Where is our content?");
     552               0 :       const nsTextFragment* textFragment = content->GetText();
     553               0 :       NS_ASSERTION(textFragment, "Where is our text?");
     554               0 :       PRInt32 lastOffsetInNode = textFragment->GetLength();
     555                 : 
     556               0 :       if (seenSoftEnd) {
     557                 :         // check whether we can stop after this
     558               0 :         for (PRInt32 i = node == mSoftEnd.mNode ? mSoftEnd.mOffset : 0;
     559               0 :              i < PRInt32(textFragment->GetLength()); ++i) {
     560               0 :           if (IsDOMWordSeparator(textFragment->CharAt(i))) {
     561               0 :             exit = true;
     562                 :             // stop at the first separator after the soft end point
     563               0 :             lastOffsetInNode = i;
     564               0 :             break;
     565                 :           }
     566                 :         }
     567                 :       }
     568                 :       
     569               0 :       if (firstOffsetInNode < lastOffsetInNode) {
     570               0 :         PRInt32 len = lastOffsetInNode - firstOffsetInNode;
     571                 :         mSoftTextDOMMapping.AppendElement(
     572               0 :           DOMTextMapping(NodeOffset(node, firstOffsetInNode), mSoftText.Length(), len));
     573               0 :         textFragment->AppendTo(mSoftText, firstOffsetInNode, len);
     574                 :       }
     575                 :       
     576               0 :       firstOffsetInNode = 0;
     577                 :     }
     578                 : 
     579               0 :     if (exit)
     580               0 :       break;
     581                 : 
     582               0 :     CheckLeavingBreakElementClosure closure = { false };
     583               0 :     node = FindNextNode(node, mRootNode, CheckLeavingBreakElement, &closure);
     584               0 :     if (closure.mLeftBreakElement || (node && IsBreakElement(node))) {
     585                 :       // We left, or are entering, a break element (e.g., block). Maybe we can
     586                 :       // stop now.
     587               0 :       if (seenSoftEnd)
     588               0 :         break;
     589                 :       // Record the break
     590               0 :       mSoftText.Append(' ');
     591                 :     }
     592                 :   }
     593                 :   
     594                 : #ifdef DEBUG_SPELLCHECK
     595                 :   printf("Got DOM string: %s\n", NS_ConvertUTF16toUTF8(mSoftText).get());
     596                 : #endif
     597               0 : }
     598                 : 
     599                 : void
     600               0 : mozInlineSpellWordUtil::BuildRealWords()
     601                 : {
     602                 :   // This is pretty simple. We just have to walk mSoftText, tokenizing it
     603                 :   // into "real words".
     604                 :   // We do an outer traversal of words delimited by IsDOMWordSeparator, calling
     605                 :   // SplitDOMWord on each of those DOM words
     606               0 :   PRInt32 wordStart = -1;
     607               0 :   mRealWords.Clear();
     608               0 :   for (PRInt32 i = 0; i < PRInt32(mSoftText.Length()); ++i) {
     609               0 :     if (IsDOMWordSeparator(mSoftText.CharAt(i))) {
     610               0 :       if (wordStart >= 0) {
     611               0 :         SplitDOMWord(wordStart, i);
     612               0 :         wordStart = -1;
     613                 :       }
     614                 :     } else {
     615               0 :       if (wordStart < 0) {
     616               0 :         wordStart = i;
     617                 :       }
     618                 :     }
     619                 :   }
     620               0 :   if (wordStart >= 0) {
     621               0 :     SplitDOMWord(wordStart, mSoftText.Length());
     622                 :   }
     623               0 : }
     624                 : 
     625                 : /*********** DOM/realwords<->mSoftText mapping functions ************/
     626                 : 
     627                 : PRInt32
     628               0 : mozInlineSpellWordUtil::MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset)
     629                 : {
     630               0 :   if (!mSoftTextValid) {
     631               0 :     NS_ERROR("Soft text must be valid if we're to map into it");
     632               0 :     return -1;
     633                 :   }
     634                 :   
     635               0 :   for (PRInt32 i = 0; i < PRInt32(mSoftTextDOMMapping.Length()); ++i) {
     636               0 :     const DOMTextMapping& map = mSoftTextDOMMapping[i];
     637               0 :     if (map.mNodeOffset.mNode == aNodeOffset.mNode) {
     638                 :       // Allow offsets at either end of the string, in particular, allow the
     639                 :       // offset that's at the end of the contributed string
     640                 :       PRInt32 offsetInContributedString =
     641               0 :         aNodeOffset.mOffset - map.mNodeOffset.mOffset;
     642               0 :       if (offsetInContributedString >= 0 &&
     643                 :           offsetInContributedString <= map.mLength)
     644               0 :         return map.mSoftTextOffset + offsetInContributedString;
     645               0 :       return -1;
     646                 :     }
     647                 :   }
     648               0 :   return -1;
     649                 : }
     650                 : 
     651                 : mozInlineSpellWordUtil::NodeOffset
     652               0 : mozInlineSpellWordUtil::MapSoftTextOffsetToDOMPosition(PRInt32 aSoftTextOffset,
     653                 :                                                        DOMMapHint aHint)
     654                 : {
     655               0 :   NS_ASSERTION(mSoftTextValid, "Soft text must be valid if we're to map out of it");
     656               0 :   if (!mSoftTextValid)
     657               0 :     return NodeOffset(nsnull, -1);
     658                 :   
     659                 :   // The invariant is that the range start..end includes the last mapping,
     660                 :   // if any, such that mSoftTextOffset <= aSoftTextOffset
     661               0 :   PRInt32 start = 0;
     662               0 :   PRInt32 end = mSoftTextDOMMapping.Length();
     663               0 :   while (end - start >= 2) {
     664               0 :     PRInt32 mid = (start + end)/2;
     665               0 :     const DOMTextMapping& map = mSoftTextDOMMapping[mid];
     666               0 :     if (map.mSoftTextOffset > aSoftTextOffset) {
     667               0 :       end = mid;
     668                 :     } else {
     669               0 :       start = mid;
     670                 :     }
     671                 :   }
     672                 :   
     673               0 :   if (start >= end)
     674               0 :     return NodeOffset(nsnull, -1);
     675                 : 
     676                 :   // 'start' is now the last mapping, if any, such that
     677                 :   // mSoftTextOffset <= aSoftTextOffset.
     678                 :   // If we're doing HINT_END, then we may want to return the end of the
     679                 :   // the previous mapping instead of the start of this mapping
     680               0 :   if (aHint == HINT_END && start > 0) {
     681               0 :     const DOMTextMapping& map = mSoftTextDOMMapping[start - 1];
     682               0 :     if (map.mSoftTextOffset + map.mLength == aSoftTextOffset)
     683               0 :       return NodeOffset(map.mNodeOffset.mNode, map.mNodeOffset.mOffset + map.mLength);
     684                 :   }
     685                 :   
     686                 :   // We allow ourselves to return the end of this mapping even if we're
     687                 :   // doing HINT_START. This will only happen if there is no mapping which this
     688                 :   // point is the start of. I'm not 100% sure this is OK...
     689               0 :   const DOMTextMapping& map = mSoftTextDOMMapping[start];
     690               0 :   PRInt32 offset = aSoftTextOffset - map.mSoftTextOffset;
     691               0 :   if (offset >= 0 && offset <= map.mLength)
     692               0 :     return NodeOffset(map.mNodeOffset.mNode, map.mNodeOffset.mOffset + offset);
     693                 :     
     694               0 :   return NodeOffset(nsnull, -1);
     695                 : }
     696                 : 
     697                 : PRInt32
     698               0 : mozInlineSpellWordUtil::FindRealWordContaining(PRInt32 aSoftTextOffset,
     699                 :     DOMMapHint aHint, bool aSearchForward)
     700                 : {
     701               0 :   NS_ASSERTION(mSoftTextValid, "Soft text must be valid if we're to map out of it");
     702               0 :   if (!mSoftTextValid)
     703               0 :     return -1;
     704                 : 
     705                 :   // The invariant is that the range start..end includes the last word,
     706                 :   // if any, such that mSoftTextOffset <= aSoftTextOffset
     707               0 :   PRInt32 start = 0;
     708               0 :   PRInt32 end = mRealWords.Length();
     709               0 :   while (end - start >= 2) {
     710               0 :     PRInt32 mid = (start + end)/2;
     711               0 :     const RealWord& word = mRealWords[mid];
     712               0 :     if (word.mSoftTextOffset > aSoftTextOffset) {
     713               0 :       end = mid;
     714                 :     } else {
     715               0 :       start = mid;
     716                 :     }
     717                 :   }
     718                 :   
     719               0 :   if (start >= end)
     720               0 :     return -1;
     721                 : 
     722                 :   // 'start' is now the last word, if any, such that
     723                 :   // mSoftTextOffset <= aSoftTextOffset.
     724                 :   // If we're doing HINT_END, then we may want to return the end of the
     725                 :   // the previous word instead of the start of this word
     726               0 :   if (aHint == HINT_END && start > 0) {
     727               0 :     const RealWord& word = mRealWords[start - 1];
     728               0 :     if (word.mSoftTextOffset + word.mLength == aSoftTextOffset)
     729               0 :       return start - 1;
     730                 :   }
     731                 :   
     732                 :   // We allow ourselves to return the end of this word even if we're
     733                 :   // doing HINT_START. This will only happen if there is no word which this
     734                 :   // point is the start of. I'm not 100% sure this is OK...
     735               0 :   const RealWord& word = mRealWords[start];
     736               0 :   PRInt32 offset = aSoftTextOffset - word.mSoftTextOffset;
     737               0 :   if (offset >= 0 && offset <= word.mLength)
     738               0 :     return start;
     739                 : 
     740               0 :   if (aSearchForward) {
     741               0 :     if (mRealWords[0].mSoftTextOffset > aSoftTextOffset) {
     742                 :       // All words have mSoftTextOffset > aSoftTextOffset
     743               0 :       return 0;
     744                 :     }
     745                 :     // 'start' is the last word such that mSoftTextOffset <= aSoftTextOffset.
     746                 :     // Word start+1, if it exists, will be the first with
     747                 :     // mSoftTextOffset > aSoftTextOffset.
     748               0 :     if (start + 1 < PRInt32(mRealWords.Length()))
     749               0 :       return start + 1;
     750                 :   }
     751                 : 
     752               0 :   return -1;
     753                 : }
     754                 : 
     755                 : /*********** Word Splitting ************/
     756                 : 
     757                 : // classifies a given character in the DOM word
     758                 : enum CharClass {
     759                 :   CHAR_CLASS_WORD,
     760                 :   CHAR_CLASS_SEPARATOR,
     761                 :   CHAR_CLASS_END_OF_INPUT };
     762                 : 
     763                 : // Encapsulates DOM-word to real-word splitting
     764                 : struct WordSplitState
     765               0 : {
     766                 :   mozInlineSpellWordUtil*    mWordUtil;
     767                 :   const nsDependentSubstring mDOMWordText;
     768                 :   PRInt32                    mDOMWordOffset;
     769                 :   CharClass                  mCurCharClass;
     770                 : 
     771               0 :   WordSplitState(mozInlineSpellWordUtil* aWordUtil,
     772                 :                  const nsString& aString, PRInt32 aStart, PRInt32 aLen)
     773                 :     : mWordUtil(aWordUtil), mDOMWordText(aString, aStart, aLen),
     774               0 :       mDOMWordOffset(0), mCurCharClass(CHAR_CLASS_END_OF_INPUT) {}
     775                 : 
     776                 :   CharClass ClassifyCharacter(PRInt32 aIndex, bool aRecurse) const;
     777                 :   void Advance();
     778                 :   void AdvanceThroughSeparators();
     779                 :   void AdvanceThroughWord();
     780                 : 
     781                 :   // Finds special words like email addresses and URLs that may start at the
     782                 :   // current position, and returns their length, or 0 if not found. This allows
     783                 :   // arbitrary word breaking rules to be used for these special entities, as
     784                 :   // long as they can not contain whitespace.
     785                 :   PRInt32 FindSpecialWord();
     786                 : 
     787                 :   // Similar to FindSpecialWord except that this takes a split word as
     788                 :   // input. This checks for things that do not require special word-breaking
     789                 :   // rules.
     790                 :   bool ShouldSkipWord(PRInt32 aStart, PRInt32 aLength);
     791                 : };
     792                 : 
     793                 : // WordSplitState::ClassifyCharacter
     794                 : 
     795                 : CharClass
     796               0 : WordSplitState::ClassifyCharacter(PRInt32 aIndex, bool aRecurse) const
     797                 : {
     798               0 :   NS_ASSERTION(aIndex >= 0 && aIndex <= PRInt32(mDOMWordText.Length()),
     799                 :                "Index out of range");
     800               0 :   if (aIndex == PRInt32(mDOMWordText.Length()))
     801               0 :     return CHAR_CLASS_SEPARATOR;
     802                 : 
     803                 :   // this will classify the character, we want to treat "ignorable" characters
     804                 :   // such as soft hyphens as word characters.
     805                 :   nsIUGenCategory::nsUGenCategory
     806               0 :     charCategory = mozilla::unicode::GetGenCategory(mDOMWordText[aIndex]);
     807               0 :   if (charCategory == nsIUGenCategory::kLetter ||
     808               0 :       IsIgnorableCharacter(mDOMWordText[aIndex]))
     809               0 :     return CHAR_CLASS_WORD;
     810                 : 
     811                 :   // If conditional punctuation is surrounded immediately on both sides by word
     812                 :   // characters it also counts as a word character.
     813               0 :   if (IsConditionalPunctuation(mDOMWordText[aIndex])) {
     814               0 :     if (!aRecurse) {
     815                 :       // not allowed to look around, this punctuation counts like a separator
     816               0 :       return CHAR_CLASS_SEPARATOR;
     817                 :     }
     818                 : 
     819                 :     // check the left-hand character
     820               0 :     if (aIndex == 0)
     821               0 :       return CHAR_CLASS_SEPARATOR;
     822               0 :     if (ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD)
     823               0 :       return CHAR_CLASS_SEPARATOR;
     824                 :     // If the previous charatcer is a word-char, make sure that it's not a
     825                 :     // special dot character.
     826               0 :     if (mDOMWordText[aIndex - 1] == '.')
     827               0 :       return CHAR_CLASS_SEPARATOR;
     828                 : 
     829                 :     // now we know left char is a word-char, check the right-hand character
     830               0 :     if (aIndex == PRInt32(mDOMWordText.Length()) - 1)
     831               0 :       return CHAR_CLASS_SEPARATOR;
     832               0 :     if (ClassifyCharacter(aIndex + 1, false) != CHAR_CLASS_WORD)
     833               0 :       return CHAR_CLASS_SEPARATOR;
     834                 :     // If the next charatcer is a word-char, make sure that it's not a
     835                 :     // special dot character.
     836               0 :     if (mDOMWordText[aIndex + 1] == '.')
     837               0 :       return CHAR_CLASS_SEPARATOR;
     838                 : 
     839                 :     // char on either side is a word, this counts as a word
     840               0 :     return CHAR_CLASS_WORD;
     841                 :   }
     842                 : 
     843                 :   // The dot character, if appearing at the end of a word, should
     844                 :   // be considered part of that word.  Example: "etc.", or
     845                 :   // abbreviations
     846               0 :   if (aIndex > 0 &&
     847               0 :       mDOMWordText[aIndex] == '.' &&
     848               0 :       mDOMWordText[aIndex - 1] != '.' &&
     849               0 :       ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD) {
     850               0 :     return CHAR_CLASS_WORD;
     851                 :   }
     852                 : 
     853                 :   // all other punctuation
     854               0 :   if (charCategory == nsIUGenCategory::kSeparator ||
     855                 :       charCategory == nsIUGenCategory::kOther ||
     856                 :       charCategory == nsIUGenCategory::kPunctuation ||
     857                 :       charCategory == nsIUGenCategory::kSymbol) {
     858                 :     // Don't break on hyphens, as hunspell handles them on its own.
     859               0 :     if (aIndex > 0 &&
     860               0 :         mDOMWordText[aIndex] == '-' &&
     861               0 :         mDOMWordText[aIndex - 1] != '-' &&
     862               0 :         ClassifyCharacter(aIndex - 1, false) == CHAR_CLASS_WORD) {
     863                 :       // A hyphen is only meaningful as a separator inside a word
     864                 :       // if the previous and next characters are a word character.
     865               0 :       if (aIndex == PRInt32(mDOMWordText.Length()) - 1)
     866               0 :         return CHAR_CLASS_SEPARATOR;
     867               0 :       if (mDOMWordText[aIndex + 1] != '.' &&
     868               0 :           ClassifyCharacter(aIndex + 1, false) == CHAR_CLASS_WORD)
     869               0 :         return CHAR_CLASS_WORD;
     870                 :     }
     871               0 :     return CHAR_CLASS_SEPARATOR;
     872                 :   }
     873                 : 
     874                 :   // any other character counts as a word
     875               0 :   return CHAR_CLASS_WORD;
     876                 : }
     877                 : 
     878                 : 
     879                 : // WordSplitState::Advance
     880                 : 
     881                 : void
     882               0 : WordSplitState::Advance()
     883                 : {
     884               0 :   NS_ASSERTION(mDOMWordOffset >= 0, "Negative word index");
     885               0 :   NS_ASSERTION(mDOMWordOffset < (PRInt32)mDOMWordText.Length(),
     886                 :                "Length beyond end");
     887                 : 
     888               0 :   mDOMWordOffset ++;
     889               0 :   if (mDOMWordOffset >= (PRInt32)mDOMWordText.Length())
     890               0 :     mCurCharClass = CHAR_CLASS_END_OF_INPUT;
     891                 :   else
     892               0 :     mCurCharClass = ClassifyCharacter(mDOMWordOffset, true);
     893               0 : }
     894                 : 
     895                 : 
     896                 : // WordSplitState::AdvanceThroughSeparators
     897                 : 
     898                 : void
     899               0 : WordSplitState::AdvanceThroughSeparators()
     900                 : {
     901               0 :   while (mCurCharClass == CHAR_CLASS_SEPARATOR)
     902               0 :     Advance();
     903               0 : }
     904                 : 
     905                 : // WordSplitState::AdvanceThroughWord
     906                 : 
     907                 : void
     908               0 : WordSplitState::AdvanceThroughWord()
     909                 : {
     910               0 :   while (mCurCharClass == CHAR_CLASS_WORD)
     911               0 :     Advance();
     912               0 : }
     913                 : 
     914                 : 
     915                 : // WordSplitState::FindSpecialWord
     916                 : 
     917                 : PRInt32
     918               0 : WordSplitState::FindSpecialWord()
     919                 : {
     920                 :   PRInt32 i;
     921                 : 
     922                 :   // Search for email addresses. We simply define these as any sequence of
     923                 :   // characters with an '@' character in the middle. The DOM word is already
     924                 :   // split on whitepace, so we know that everything to the end is the address
     925                 :   //
     926                 :   // Also look for periods, this tells us if we want to run the URL finder.
     927               0 :   bool foundDot = false;
     928               0 :   PRInt32 firstColon = -1;
     929               0 :   for (i = mDOMWordOffset;
     930               0 :        i < PRInt32(mDOMWordText.Length()); i ++) {
     931               0 :     if (mDOMWordText[i] == '@') {
     932                 :       // only accept this if there are unambiguous word characters (don't bother
     933                 :       // recursing to disambiguate apostrophes) on each side. This prevents
     934                 :       // classifying, e.g. "@home" as an email address
     935                 : 
     936                 :       // Use this condition to only accept words with '@' in the middle of
     937                 :       // them. It works, but the inlinespellcker doesn't like this. The problem
     938                 :       // is that you type "fhsgfh@" that's a misspelled word followed by a
     939                 :       // symbol, but when you type another letter "fhsgfh@g" that first word
     940                 :       // need to be unmarked misspelled. It doesn't do this. it only checks the
     941                 :       // current position for potentially removing a spelling range.
     942               0 :       if (i > 0 && ClassifyCharacter(i - 1, false) == CHAR_CLASS_WORD &&
     943               0 :           i < (PRInt32)mDOMWordText.Length() - 1 &&
     944               0 :           ClassifyCharacter(i + 1, false) == CHAR_CLASS_WORD)
     945                 : 
     946               0 :       return mDOMWordText.Length() - mDOMWordOffset;
     947               0 :     } else if (mDOMWordText[i] == '.' && ! foundDot &&
     948               0 :         i > 0 && i < (PRInt32)mDOMWordText.Length() - 1) {
     949                 :       // we found a period not at the end, we should check harder for URLs
     950               0 :       foundDot = true;
     951               0 :     } else if (mDOMWordText[i] == ':' && firstColon < 0) {
     952               0 :       firstColon = i;
     953                 :     }
     954                 :   }
     955                 : 
     956                 :   // If the first colon is followed by a slash, consider it a URL
     957                 :   // This will catch things like asdf://foo.com
     958               0 :   if (firstColon >= 0 && firstColon < (PRInt32)mDOMWordText.Length() - 1 &&
     959               0 :       mDOMWordText[firstColon + 1] == '/') {
     960               0 :     return mDOMWordText.Length() - mDOMWordOffset;
     961                 :   }
     962                 : 
     963                 :   // Check the text before the first colon against some known protocols. It
     964                 :   // is impossible to check against all protocols, especially since you can
     965                 :   // plug in new protocols. We also don't want to waste time here checking
     966                 :   // against a lot of obscure protocols.
     967               0 :   if (firstColon > mDOMWordOffset) {
     968                 :     nsString protocol(Substring(mDOMWordText, mDOMWordOffset,
     969               0 :                       firstColon - mDOMWordOffset));
     970               0 :     if (protocol.EqualsIgnoreCase("http") ||
     971               0 :         protocol.EqualsIgnoreCase("https") ||
     972               0 :         protocol.EqualsIgnoreCase("news") ||
     973               0 :         protocol.EqualsIgnoreCase("file") ||
     974               0 :         protocol.EqualsIgnoreCase("javascript") ||
     975               0 :         protocol.EqualsIgnoreCase("ftp")) {
     976               0 :       return mDOMWordText.Length() - mDOMWordOffset;
     977                 :     }
     978                 :   }
     979                 : 
     980                 :   // not anything special
     981               0 :   return -1;
     982                 : }
     983                 : 
     984                 : // WordSplitState::ShouldSkipWord
     985                 : 
     986                 : bool
     987               0 : WordSplitState::ShouldSkipWord(PRInt32 aStart, PRInt32 aLength)
     988                 : {
     989               0 :   PRInt32 last = aStart + aLength;
     990                 : 
     991                 :   // check to see if the word contains a digit
     992               0 :   for (PRInt32 i = aStart; i < last; i ++) {
     993               0 :     PRUnichar ch = mDOMWordText[i];
     994                 :     // XXX Shouldn't this be something a lot more complex, Unicode-based?
     995               0 :     if (ch >= '0' && ch <= '9')
     996               0 :       return true;
     997                 :   }
     998                 : 
     999                 :   // not special
    1000               0 :   return false;
    1001                 : }
    1002                 : 
    1003                 : // mozInlineSpellWordUtil::SplitDOMWord
    1004                 : 
    1005                 : void
    1006               0 : mozInlineSpellWordUtil::SplitDOMWord(PRInt32 aStart, PRInt32 aEnd)
    1007                 : {
    1008               0 :   WordSplitState state(this, mSoftText, aStart, aEnd - aStart);
    1009               0 :   state.mCurCharClass = state.ClassifyCharacter(0, true);
    1010                 : 
    1011               0 :   while (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT) {
    1012               0 :     state.AdvanceThroughSeparators();
    1013               0 :     if (state.mCurCharClass == CHAR_CLASS_END_OF_INPUT)
    1014               0 :       break;
    1015                 : 
    1016               0 :     PRInt32 specialWordLength = state.FindSpecialWord();
    1017               0 :     if (specialWordLength > 0) {
    1018                 :       mRealWords.AppendElement(
    1019               0 :         RealWord(aStart + state.mDOMWordOffset, specialWordLength, false));
    1020                 : 
    1021                 :       // skip the special word
    1022               0 :       state.mDOMWordOffset += specialWordLength;
    1023               0 :       if (state.mDOMWordOffset + aStart >= aEnd)
    1024               0 :         state.mCurCharClass = CHAR_CLASS_END_OF_INPUT;
    1025                 :       else
    1026               0 :         state.mCurCharClass = state.ClassifyCharacter(state.mDOMWordOffset, true);
    1027               0 :       continue;
    1028                 :     }
    1029                 : 
    1030                 :     // save the beginning of the word
    1031               0 :     PRInt32 wordOffset = state.mDOMWordOffset;
    1032                 : 
    1033                 :     // find the end of the word
    1034               0 :     state.AdvanceThroughWord();
    1035               0 :     PRInt32 wordLen = state.mDOMWordOffset - wordOffset;
    1036                 :     mRealWords.AppendElement(
    1037                 :       RealWord(aStart + wordOffset, wordLen,
    1038               0 :                !state.ShouldSkipWord(wordOffset, wordLen)));
    1039                 :   }
    1040               0 : }

Generated by: LCOV version 1.7