LCOV - code coverage report
Current view: directory - extensions/spellcheck/src - mozEnglishWordUtils.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 155 2 1.3 %
Date: 2012-06-02 Functions: 17 2 11.8 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is Mozilla Spellchecker Component.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * David Einstein.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 2001
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s): David Einstein Deinst@world.std.com
      23                 :  *
      24                 :  * Alternatively, the contents of this file may be used under the terms of
      25                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      26                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28                 :  * of those above. If you wish to allow use of your version of this file only
      29                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30                 :  * use your version of this file under the terms of the MPL, indicate your
      31                 :  * decision by deleting the provisions above and replace them with the notice
      32                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33                 :  * the provisions above, a recipient may use your version of this file under
      34                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      35                 :  *
      36                 :  * ***** END LICENSE BLOCK ***** */
      37                 : 
      38                 : #include "mozEnglishWordUtils.h"
      39                 : #include "nsReadableUtils.h"
      40                 : #include "nsIServiceManager.h"
      41                 : #include "nsUnicharUtils.h"
      42                 : #include "nsUnicharUtilCIID.h"
      43                 : #include "nsUnicodeProperties.h"
      44                 : #include "nsCRT.h"
      45                 : 
      46               0 : NS_IMPL_CYCLE_COLLECTING_ADDREF(mozEnglishWordUtils)
      47               0 : NS_IMPL_CYCLE_COLLECTING_RELEASE(mozEnglishWordUtils)
      48                 : 
      49               0 : NS_INTERFACE_MAP_BEGIN(mozEnglishWordUtils)
      50               0 :   NS_INTERFACE_MAP_ENTRY(mozISpellI18NUtil)
      51               0 :   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellI18NUtil)
      52               0 :   NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozEnglishWordUtils)
      53               0 : NS_INTERFACE_MAP_END
      54                 : 
      55            1464 : NS_IMPL_CYCLE_COLLECTION_1(mozEnglishWordUtils,
      56                 :                            mURLDetector)
      57                 : 
      58               0 : mozEnglishWordUtils::mozEnglishWordUtils()
      59                 : {
      60               0 :   mLanguage.AssignLiteral("en");
      61                 : 
      62                 :   nsresult rv;
      63               0 :   mURLDetector = do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID, &rv);
      64               0 : }
      65                 : 
      66               0 : mozEnglishWordUtils::~mozEnglishWordUtils()
      67                 : {
      68               0 : }
      69                 : 
      70                 : /* attribute wstring language; */
      71               0 : NS_IMETHODIMP mozEnglishWordUtils::GetLanguage(PRUnichar * *aLanguage)
      72                 : {
      73               0 :   nsresult rv = NS_OK;
      74               0 :   NS_ENSURE_ARG_POINTER(aLanguage);
      75                 : 
      76               0 :   *aLanguage = ToNewUnicode(mLanguage);
      77               0 :   if(!aLanguage) rv = NS_ERROR_OUT_OF_MEMORY;
      78               0 :   return rv;
      79                 :  }
      80                 : 
      81                 : /* void GetRootForm (in wstring aWord, in PRUint32 type, [array, size_is (count)] out wstring words, out PRUint32 count); */
      82                 : // return the possible root forms of aWord.
      83               0 : NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const PRUnichar *aWord, PRUint32 type, PRUnichar ***words, PRUint32 *count)
      84                 : {
      85               0 :   nsAutoString word(aWord);
      86                 :   PRUnichar **tmpPtr;
      87               0 :   PRInt32 length = word.Length();
      88                 : 
      89               0 :   *count = 0;
      90                 : 
      91               0 :   mozEnglishWordUtils::myspCapitalization ct = captype(word);
      92               0 :   switch (ct)
      93                 :     {
      94                 :     case HuhCap:
      95                 :     case NoCap: 
      96               0 :       tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *));
      97               0 :       if (!tmpPtr)
      98               0 :         return NS_ERROR_OUT_OF_MEMORY;
      99               0 :       tmpPtr[0] = ToNewUnicode(word);
     100               0 :       if (!tmpPtr[0]) {
     101               0 :         NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
     102               0 :         return NS_ERROR_OUT_OF_MEMORY;
     103                 :       }
     104               0 :       *words = tmpPtr;
     105               0 :       *count = 1;
     106               0 :       break;
     107                 :     
     108                 : 
     109                 :     case AllCap:
     110               0 :       tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *) * 3);
     111               0 :       if (!tmpPtr)
     112               0 :         return NS_ERROR_OUT_OF_MEMORY;
     113               0 :       tmpPtr[0] = ToNewUnicode(word);
     114               0 :       if (!tmpPtr[0]) {
     115               0 :         NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
     116               0 :         return NS_ERROR_OUT_OF_MEMORY;
     117                 :       }
     118               0 :       ToLowerCase(tmpPtr[0], tmpPtr[0], length);
     119                 : 
     120               0 :       tmpPtr[1] = ToNewUnicode(word);
     121               0 :       if (!tmpPtr[1]) {
     122               0 :         NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr);
     123               0 :         return NS_ERROR_OUT_OF_MEMORY;
     124                 :       }
     125               0 :       ToLowerCase(tmpPtr[1], tmpPtr[1], length);
     126               0 :       ToUpperCase(tmpPtr[1], tmpPtr[1], 1);
     127                 : 
     128               0 :       tmpPtr[2] = ToNewUnicode(word);
     129               0 :       if (!tmpPtr[2]) {
     130               0 :         NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(2, tmpPtr);
     131               0 :         return NS_ERROR_OUT_OF_MEMORY;
     132                 :       }
     133                 : 
     134               0 :       *words = tmpPtr;
     135               0 :       *count = 3;
     136               0 :       break;
     137                 :  
     138                 :     case InitCap:  
     139               0 :       tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *) * 2);
     140               0 :       if (!tmpPtr)
     141               0 :         return NS_ERROR_OUT_OF_MEMORY;
     142                 : 
     143               0 :       tmpPtr[0] = ToNewUnicode(word);
     144               0 :       if (!tmpPtr[0]) {
     145               0 :         NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
     146               0 :         return NS_ERROR_OUT_OF_MEMORY;
     147                 :       }
     148               0 :       ToLowerCase(tmpPtr[0], tmpPtr[0], length);
     149                 : 
     150               0 :       tmpPtr[1] = ToNewUnicode(word);
     151               0 :       if (!tmpPtr[1]) {
     152               0 :         NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr);
     153               0 :         return NS_ERROR_OUT_OF_MEMORY;
     154                 :       }
     155                 : 
     156               0 :       *words = tmpPtr;
     157               0 :       *count = 2;
     158               0 :       break;
     159                 :     default:
     160               0 :       return NS_ERROR_FAILURE; // should never get here;
     161                 :     }
     162               0 :   return NS_OK;
     163                 : }
     164                 : 
     165                 : // This needs vast improvement
     166               0 : bool mozEnglishWordUtils::ucIsAlpha(PRUnichar aChar)
     167                 : {
     168                 :   // XXX we have to fix callers to handle the full Unicode range
     169               0 :   return nsIUGenCategory::kLetter == mozilla::unicode::GetGenCategory(aChar);
     170                 : }
     171                 : 
     172                 : /* void FindNextWord (in wstring word, in PRUint32 length, in PRUint32 offset, out PRUint32 begin, out PRUint32 end); */
     173               0 : NS_IMETHODIMP mozEnglishWordUtils::FindNextWord(const PRUnichar *word, PRUint32 length, PRUint32 offset, PRInt32 *begin, PRInt32 *end)
     174                 : {
     175               0 :   const PRUnichar *p = word + offset;
     176               0 :   const PRUnichar *endbuf = word + length;
     177               0 :   const PRUnichar *startWord=p;
     178               0 :   if(p<endbuf){
     179                 :     // XXX These loops should be modified to handle non-BMP characters.
     180                 :     // if previous character is a word character, need to advance out of the word
     181               0 :     if (offset > 0 && ucIsAlpha(*(p-1))) {
     182               0 :       while (p < endbuf && ucIsAlpha(*p))
     183               0 :         p++;
     184                 :     }
     185               0 :     while((p < endbuf) && (!ucIsAlpha(*p)))
     186                 :       {
     187               0 :         p++;
     188                 :       }
     189               0 :     startWord=p;
     190               0 :     while((p < endbuf) && ((ucIsAlpha(*p))||(*p=='\'')))
     191                 :       { 
     192               0 :         p++;
     193                 :       }
     194                 :     
     195                 :     // we could be trying to break down a url, we don't want to break a url into parts,
     196                 :     // instead we want to find out if it really is a url and if so, skip it, advancing startWord 
     197                 :     // to a point after the url.
     198                 : 
     199                 :     // before we spend more time looking to see if the word is a url, look for a url identifer
     200                 :     // and make sure that identifer isn't the last character in the word fragment.
     201               0 :     if ( (*p == ':' || *p == '@' || *p == '.') &&  p < endbuf - 1) {
     202                 : 
     203                 :         // ok, we have a possible url...do more research to find out if we really have one
     204                 :         // and determine the length of the url so we can skip over it.
     205                 :        
     206               0 :         if (mURLDetector)
     207                 :         {
     208               0 :           PRInt32 startPos = -1;
     209               0 :           PRInt32 endPos = -1;        
     210                 : 
     211               0 :           mURLDetector->FindURLInPlaintext(startWord, endbuf - startWord, p - startWord, &startPos, &endPos);
     212                 : 
     213                 :           // ok, if we got a url, adjust the array bounds, skip the current url text and find the next word again
     214               0 :           if (startPos != -1 && endPos != -1) { 
     215               0 :             startWord = p + endPos + 1; // skip over the url
     216               0 :             p = startWord; // reset p
     217                 : 
     218                 :             // now recursively call FindNextWord to search for the next word now that we have skipped the url
     219               0 :             return FindNextWord(word, length, startWord - word, begin, end);
     220                 :           }
     221                 :         }
     222                 :     }
     223                 : 
     224               0 :     while((p > startWord)&&(*(p-1) == '\'')){  // trim trailing apostrophes
     225               0 :       p--;
     226                 :     }
     227                 :   }
     228                 :   else{
     229               0 :     startWord = endbuf;
     230                 :   }
     231               0 :   if(startWord == endbuf){
     232               0 :     *begin = -1;
     233               0 :     *end = -1;
     234                 :   }
     235                 :   else{
     236               0 :     *begin = startWord-word;
     237               0 :     *end = p-word;
     238                 :   }
     239               0 :   return NS_OK;
     240                 : }
     241                 : 
     242                 : mozEnglishWordUtils::myspCapitalization 
     243               0 : mozEnglishWordUtils::captype(const nsString &word)
     244                 : {
     245               0 :   PRUnichar* lword=ToNewUnicode(word);  
     246               0 :   ToUpperCase(lword,lword,word.Length());
     247               0 :   if(word.Equals(lword)){
     248               0 :     nsMemory::Free(lword);
     249               0 :     return AllCap;
     250                 :   }
     251                 : 
     252               0 :   ToLowerCase(lword,lword,word.Length());
     253               0 :   if(word.Equals(lword)){
     254               0 :     nsMemory::Free(lword);
     255               0 :     return NoCap;
     256                 :   }
     257               0 :   PRInt32 length=word.Length();
     258               0 :   if(Substring(word,1,length-1).Equals(lword+1)){
     259               0 :     nsMemory::Free(lword);
     260               0 :     return InitCap;
     261                 :   }
     262               0 :   nsMemory::Free(lword);
     263               0 :   return HuhCap;
     264                 : }
     265                 : 
     266                 : // Convert the list of words in iwords to the same capitalization aWord and 
     267                 : // return them in owords.
     268               0 : NS_IMETHODIMP mozEnglishWordUtils::FromRootForm(const PRUnichar *aWord, const PRUnichar **iwords, PRUint32 icount, PRUnichar ***owords, PRUint32 *ocount)
     269                 : {
     270               0 :   nsAutoString word(aWord);
     271               0 :   nsresult rv = NS_OK;
     272                 : 
     273                 :   PRInt32 length;
     274               0 :   PRUnichar **tmpPtr  = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *)*icount);
     275               0 :   if (!tmpPtr)
     276               0 :     return NS_ERROR_OUT_OF_MEMORY;
     277                 : 
     278               0 :   mozEnglishWordUtils::myspCapitalization ct = captype(word);
     279               0 :   for(PRUint32 i = 0; i < icount; ++i) {
     280               0 :     length = nsCRT::strlen(iwords[i]);
     281               0 :     tmpPtr[i] = (PRUnichar *) nsMemory::Alloc(sizeof(PRUnichar) * (length + 1));
     282               0 :     if (NS_UNLIKELY(!tmpPtr[i])) {
     283               0 :       NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(i, tmpPtr);
     284               0 :       return NS_ERROR_OUT_OF_MEMORY;
     285                 :     }
     286               0 :     memcpy(tmpPtr[i], iwords[i], (length + 1) * sizeof(PRUnichar));
     287                 : 
     288               0 :     nsAutoString capTest(tmpPtr[i]);
     289               0 :     mozEnglishWordUtils::myspCapitalization newCt=captype(capTest);
     290               0 :     if(newCt == NoCap){
     291               0 :       switch(ct) 
     292                 :         {
     293                 :         case HuhCap:
     294                 :         case NoCap:
     295               0 :           break;
     296                 :         case AllCap:
     297               0 :           ToUpperCase(tmpPtr[i],tmpPtr[i],length);
     298               0 :           rv = NS_OK;
     299               0 :           break;
     300                 :         case InitCap:  
     301               0 :           ToUpperCase(tmpPtr[i],tmpPtr[i],1);
     302               0 :           rv = NS_OK;
     303               0 :           break;
     304                 :         default:
     305               0 :           rv = NS_ERROR_FAILURE; // should never get here;
     306               0 :           break;
     307                 : 
     308                 :         }
     309                 :     }
     310                 :   }
     311               0 :   if (NS_SUCCEEDED(rv)){
     312               0 :     *owords = tmpPtr;
     313               0 :     *ocount = icount;
     314                 :   }
     315               0 :   return rv;
     316            4392 : }
     317                 : 

Generated by: LCOV version 1.7