LCOV - code coverage report
Current view: directory - extensions/spellcheck/hunspell/src - hashmgr.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 580 391 67.4 %
Date: 2012-06-02 Functions: 23 17 73.9 %

       1                 : /******* BEGIN LICENSE BLOCK *******
       2                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       3                 :  * 
       4                 :  * The contents of this file are subject to the Mozilla Public License Version
       5                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       6                 :  * the License. You may obtain a copy of the License at
       7                 :  * http://www.mozilla.org/MPL/
       8                 :  * 
       9                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      10                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      11                 :  * for the specific language governing rights and limitations under the
      12                 :  * License.
      13                 :  * 
      14                 :  * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
      15                 :  * and László Németh (Hunspell). Portions created by the Initial Developers
      16                 :  * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
      17                 :  * 
      18                 :  * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
      19                 :  *                 David Einstein (deinst@world.std.com)
      20                 :  *                 László Németh (nemethl@gyorsposta.hu)
      21                 :  *                 Caolan McNamara (caolanm@redhat.com)
      22                 :  *                 Davide Prina
      23                 :  *                 Giuseppe Modugno
      24                 :  *                 Gianluca Turconi
      25                 :  *                 Simon Brouwer
      26                 :  *                 Noll Janos
      27                 :  *                 Biro Arpad
      28                 :  *                 Goldman Eleonora
      29                 :  *                 Sarlos Tamas
      30                 :  *                 Bencsath Boldizsar
      31                 :  *                 Halacsy Peter
      32                 :  *                 Dvornik Laszlo
      33                 :  *                 Gefferth Andras
      34                 :  *                 Nagy Viktor
      35                 :  *                 Varga Daniel
      36                 :  *                 Chris Halls
      37                 :  *                 Rene Engelhard
      38                 :  *                 Bram Moolenaar
      39                 :  *                 Dafydd Jones
      40                 :  *                 Harri Pitkanen
      41                 :  *                 Andras Timar
      42                 :  *                 Tor Lillqvist
      43                 :  * 
      44                 :  * Alternatively, the contents of this file may be used under the terms of
      45                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      46                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      47                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      48                 :  * of those above. If you wish to allow use of your version of this file only
      49                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      50                 :  * use your version of this file under the terms of the MPL, indicate your
      51                 :  * decision by deleting the provisions above and replace them with the notice
      52                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      53                 :  * the provisions above, a recipient may use your version of this file under
      54                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      55                 :  *
      56                 :  ******* END LICENSE BLOCK *******/
      57                 : 
      58                 : #include <stdlib.h> 
      59                 : #include <string.h>
      60                 : #include <stdio.h> 
      61                 : #include <ctype.h>
      62                 : 
      63                 : #include "hashmgr.hxx"
      64                 : #include "csutil.hxx"
      65                 : #include "atypes.hxx"
      66                 : 
      67                 : // build a hash table from a munched word list
      68                 : 
      69             110 : HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
      70                 : {
      71             110 :   tablesize = 0;
      72             110 :   tableptr = NULL;
      73             110 :   flag_mode = FLAG_CHAR;
      74             110 :   complexprefixes = 0;
      75             110 :   utf8 = 0;
      76             110 :   langnum = 0;
      77             110 :   lang = NULL;
      78             110 :   enc = NULL;
      79             110 :   csconv = 0;
      80             110 :   ignorechars = NULL;
      81             110 :   ignorechars_utf16 = NULL;
      82             110 :   ignorechars_utf16_len = 0;
      83             110 :   numaliasf = 0;
      84             110 :   aliasf = NULL;
      85             110 :   numaliasm = 0;
      86             110 :   aliasm = NULL;
      87             110 :   forbiddenword = FORBIDDENWORD; // forbidden word signing flag
      88             110 :   load_config(apath, key);
      89             110 :   int ec = load_tables(tpath, key);
      90             110 :   if (ec) {
      91                 :     /* error condition - what should we do here */
      92               0 :     HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
      93               0 :     if (tableptr) {
      94               0 :       free(tableptr);
      95               0 :       tableptr = NULL;
      96                 :     }
      97               0 :     tablesize = 0;
      98                 :   }
      99             110 : }
     100                 : 
     101                 : 
     102             110 : HashMgr::~HashMgr()
     103                 : {
     104             110 :   if (tableptr) {
     105                 :     // now pass through hash table freeing up everything
     106                 :     // go through column by column of the table
     107          111266 :     for (int i=0; i < tablesize; i++) {
     108          111156 :       struct hentry * pt = tableptr[i];
     109          111156 :       struct hentry * nt = NULL;
     110          222815 :       while(pt) {
     111             503 :         nt = pt->next;
     112             503 :         if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);
     113             503 :         free(pt);
     114             503 :         pt = nt;
     115                 :       }
     116                 :     }
     117             110 :     free(tableptr);
     118                 :   }
     119             110 :   tablesize = 0;
     120                 : 
     121             110 :   if (aliasf) {
     122               2 :     for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
     123               2 :     free(aliasf);
     124               2 :     aliasf = NULL;
     125               2 :     if (aliasflen) {
     126               2 :       free(aliasflen);
     127               2 :       aliasflen = NULL;
     128                 :     }
     129                 :   }
     130             110 :   if (aliasm) {
     131               2 :     for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
     132               2 :     free(aliasm);
     133               2 :     aliasm = NULL;
     134                 :   }  
     135                 : 
     136                 : #ifndef OPENOFFICEORG
     137                 : #ifndef MOZILLA_CLIENT
     138                 :   if (utf8) free_utf_tbl();
     139                 : #endif
     140                 : #endif
     141                 : 
     142             110 :   if (enc) free(enc);
     143             110 :   if (lang) free(lang);
     144                 :   
     145             110 :   if (ignorechars) free(ignorechars);
     146             110 :   if (ignorechars_utf16) free(ignorechars_utf16);
     147                 : 
     148                 : #ifdef MOZILLA_CLIENT
     149             110 :     delete [] csconv;
     150                 : #endif
     151             110 : }
     152                 : 
     153                 : // lookup a root word in the hashtable
     154                 : 
     155           14675 : struct hentry * HashMgr::lookup(const char *word) const
     156                 : {
     157                 :     struct hentry * dp;
     158           14675 :     if (tableptr) {
     159           14675 :        dp = tableptr[hash(word)];
     160           14675 :        if (!dp) return NULL;
     161            2567 :        for (  ;  dp != NULL;  dp = dp->next) {
     162            2253 :           if (strcmp(word, dp->word) == 0) return dp;
     163                 :        }
     164                 :     }
     165             314 :     return NULL;
     166                 : }
     167                 : 
     168                 : // add a word to the hash table (private)
     169             505 : int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
     170                 :     int al, const char * desc, bool onlyupcase)
     171                 : {
     172             505 :     bool upcasehomonym = false;
     173             505 :     int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
     174                 :     // variable-length hash record with word and optional fields
     175                 :     struct hentry* hp = 
     176             505 :         (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
     177             505 :     if (!hp) return 1;
     178             505 :     char * hpw = hp->word;
     179             505 :     strcpy(hpw, word);
     180             505 :     if (ignorechars != NULL) {
     181              14 :       if (utf8) {
     182              12 :         remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
     183                 :       } else {
     184               2 :         remove_ignored_chars(hpw, ignorechars);
     185                 :       }
     186                 :     }
     187             505 :     if (complexprefixes) {
     188               6 :         if (utf8) reverseword_utf(hpw); else reverseword(hpw);
     189                 :     }
     190                 : 
     191             505 :     int i = hash(hpw);
     192                 : 
     193             505 :     hp->blen = (unsigned char) wbl;
     194             505 :     hp->clen = (unsigned char) wcl;
     195             505 :     hp->alen = (short) al;
     196             505 :     hp->astr = aff;
     197             505 :     hp->next = NULL;      
     198             505 :     hp->next_homonym = NULL;
     199                 : 
     200                 :     // store the description string or its pointer
     201             505 :     if (desc) {
     202              43 :         hp->var = H_OPT;
     203              43 :         if (aliasm) {
     204               2 :             hp->var += H_OPT_ALIASM;
     205               2 :             store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
     206                 :         } else {
     207              41 :             strcpy(hpw + wbl + 1, desc);
     208              41 :             if (complexprefixes) {
     209               1 :                 if (utf8) reverseword_utf(HENTRY_DATA(hp));
     210               1 :                 else reverseword(HENTRY_DATA(hp));
     211                 :             }
     212                 :         }
     213              43 :         if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
     214             462 :     } else hp->var = 0;
     215                 : 
     216             505 :        struct hentry * dp = tableptr[i];
     217             505 :        if (!dp) {
     218             486 :          tableptr[i] = hp;
     219             486 :          return 0;
     220                 :        }
     221              43 :        while (dp->next != NULL) {
     222               5 :          if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
     223                 :             // remove hidden onlyupcase homonym
     224               0 :             if (!onlyupcase) {
     225               0 :                 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
     226               0 :                     free(dp->astr);
     227               0 :                     dp->astr = hp->astr;
     228               0 :                     dp->alen = hp->alen;
     229               0 :                     free(hp);
     230               0 :                     return 0;
     231                 :                 } else {
     232               0 :                     dp->next_homonym = hp;
     233                 :                 }
     234                 :             } else {
     235               0 :                 upcasehomonym = true;
     236                 :             }
     237                 :          }
     238               5 :          dp=dp->next;
     239                 :        }
     240              19 :        if (strcmp(hp->word, dp->word) == 0) {
     241                 :             // remove hidden onlyupcase homonym
     242              16 :             if (!onlyupcase) {
     243              15 :                 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
     244               1 :                     free(dp->astr);
     245               1 :                     dp->astr = hp->astr;
     246               1 :                     dp->alen = hp->alen;
     247               1 :                     free(hp);
     248               1 :                     return 0;
     249                 :                 } else {
     250              14 :                     dp->next_homonym = hp;
     251                 :                 }
     252                 :             } else {
     253               1 :                 upcasehomonym = true;
     254                 :             }
     255                 :        }
     256              18 :        if (!upcasehomonym) {
     257              17 :             dp->next = hp;
     258                 :        } else {
     259                 :             // remove hidden onlyupcase homonym
     260               1 :             if (hp->astr) free(hp->astr);
     261               1 :             free(hp);
     262                 :        }
     263              18 :     return 0;
     264                 : }     
     265                 : 
     266             488 : int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
     267                 :     unsigned short * flags, int al, char * dp, int captype)
     268                 : {
     269                 :     // add inner capitalized forms to handle the following allcap forms:
     270                 :     // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
     271                 :     // Allcaps with suffixes: CIA's -> CIA'S    
     272             505 :     if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
     273                 :       ((captype == ALLCAP) && (flags != NULL))) &&
     274              14 :       !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {
     275              17 :           unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));
     276              17 :           if (!flags2) return 1;
     277              17 :           if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
     278              17 :           flags2[al] = ONLYUPCASEFLAG;
     279              17 :           if (utf8) {
     280                 :               char st[BUFSIZE];
     281                 :               w_char w[BUFSIZE];
     282               4 :               int wlen = u8_u16(w, BUFSIZE, word);
     283               4 :               mkallsmall_utf(w, wlen, langnum);
     284               4 :               mkallcap_utf(w, 1, langnum);
     285               4 :               u16_u8(st, BUFSIZE, w, wlen);
     286               4 :               return add_word(st,wbl,wcl,flags2,al+1,dp, true);
     287                 :            } else {
     288              13 :                mkallsmall(word, csconv);
     289              13 :                mkinitcap(word, csconv);
     290              13 :                return add_word(word,wbl,wcl,flags2,al+1,dp, true);
     291                 :            }
     292                 :     }
     293             471 :     return 0;
     294                 : }
     295                 : 
     296                 : // detect captype and modify word length for UTF-8 encoding
     297             488 : int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
     298                 :     int len;
     299             488 :     if (utf8) {
     300                 :       w_char dest_utf[BUFSIZE];
     301             131 :       len = u8_u16(dest_utf, BUFSIZE, word);
     302             131 :       *captype = get_captype_utf8(dest_utf, len, langnum);
     303                 :     } else {
     304             357 :       len = wbl;
     305             357 :       *captype = get_captype((char *) word, len, csconv);
     306                 :     }
     307             488 :     return len;
     308                 : }
     309                 : 
     310                 : // remove word (personal dictionary function for standalone applications)
     311               0 : int HashMgr::remove(const char * word)
     312                 : {
     313               0 :     struct hentry * dp = lookup(word);
     314               0 :     while (dp) {
     315               0 :         if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
     316                 :             unsigned short * flags =
     317               0 :                 (unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
     318               0 :             if (!flags) return 1;
     319               0 :             for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
     320               0 :             flags[dp->alen] = forbiddenword;
     321               0 :             dp->astr = flags;
     322               0 :             dp->alen++;
     323               0 :             flag_qsort(flags, 0, dp->alen);
     324                 :         }
     325               0 :         dp = dp->next_homonym;
     326                 :     }
     327               0 :     return 0;
     328                 : }
     329                 : 
     330                 : /* remove forbidden flag to add a personal word to the hash */
     331               0 : int HashMgr::remove_forbidden_flag(const char * word) {
     332               0 :     struct hentry * dp = lookup(word);
     333               0 :     if (!dp) return 1;
     334               0 :     while (dp) {
     335               0 :          if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
     336               0 :             if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
     337                 :             else {
     338                 :                 unsigned short * flags2 =
     339               0 :                     (unsigned short *) malloc(sizeof(short) * (dp->alen - 1));
     340               0 :                 if (!flags2) return 1;
     341               0 :                 int i, j = 0;
     342               0 :                 for (i = 0; i < dp->alen; i++) {
     343               0 :                     if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
     344                 :                 }
     345               0 :                 dp->alen--;
     346               0 :                 dp->astr = flags2; // XXX allowed forbidden words
     347                 :             }
     348                 :          }
     349               0 :          dp = dp->next_homonym;
     350                 :        }
     351               0 :    return 0;
     352                 : }
     353                 : 
     354                 : // add a custom dic. word to the hash table (public)
     355               0 : int HashMgr::add(const char * word)
     356                 : {
     357               0 :     unsigned short * flags = NULL;
     358               0 :     int al = 0;
     359               0 :     if (remove_forbidden_flag(word)) {
     360                 :         int captype;
     361               0 :         int wbl = strlen(word);
     362               0 :         int wcl = get_clen_and_captype(word, wbl, &captype);
     363               0 :         add_word(word, wbl, wcl, flags, al, NULL, false);
     364               0 :         return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
     365                 :     }
     366               0 :     return 0;
     367                 : }
     368                 : 
     369               0 : int HashMgr::add_with_affix(const char * word, const char * example)
     370                 : {
     371                 :     // detect captype and modify word length for UTF-8 encoding
     372               0 :     struct hentry * dp = lookup(example);
     373               0 :     remove_forbidden_flag(word);
     374               0 :     if (dp && dp->astr) {
     375                 :         int captype;
     376               0 :         int wbl = strlen(word);
     377               0 :         int wcl = get_clen_and_captype(word, wbl, &captype);
     378               0 :         if (aliasf) {
     379               0 :             add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);    
     380                 :         } else {
     381               0 :             unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));
     382               0 :             if (flags) {
     383               0 :                 memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
     384               0 :                 add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
     385               0 :             } else return 1;
     386                 :         }
     387               0 :         return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);
     388                 :     }
     389               0 :     return 1;
     390                 : }
     391                 : 
     392                 : // walk the hash table entry by entry - null at end
     393                 : // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
     394               0 : struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
     395                 : {  
     396               0 :   if (hp && hp->next != NULL) return hp->next;
     397               0 :   for (col++; col < tablesize; col++) {
     398               0 :     if (tableptr[col]) return tableptr[col];
     399                 :   }
     400                 :   // null at end and reset to start
     401               0 :   col = -1;
     402               0 :   return NULL;
     403                 : }
     404                 : 
     405                 : // load a munched word list and build a hash table on the fly
     406             110 : int HashMgr::load_tables(const char * tpath, const char * key)
     407                 : {
     408                 :   int al;
     409                 :   char * ap;
     410                 :   char * dp;
     411                 :   char * dp2;
     412                 :   unsigned short * flags;
     413                 :   char * ts;
     414                 : 
     415                 :   // open dictionary file
     416             110 :   FileMgr * dict = new FileMgr(tpath, key);
     417             110 :   if (dict == NULL) return 1;
     418                 : 
     419                 :   // first read the first line of file to get hash table size */
     420             110 :   if (!(ts = dict->getline())) {
     421               0 :     HUNSPELL_WARNING(stderr, "error: empty dic file\n");
     422               0 :     delete dict;
     423               0 :     return 2;
     424                 :   }
     425             110 :   mychomp(ts);
     426                 : 
     427                 :   /* remove byte order mark */
     428             110 :   if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
     429               1 :     memmove(ts, ts+3, strlen(ts+3)+1);
     430                 :     // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions
     431                 :   }
     432                 : 
     433             110 :   tablesize = atoi(ts);
     434             110 :   if (tablesize == 0) {
     435               0 :     HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
     436               0 :     delete dict;
     437               0 :     return 4;
     438                 :   }
     439             110 :   tablesize = tablesize + 5 + USERWORD;
     440             110 :   if ((tablesize %2) == 0) tablesize++;
     441                 : 
     442                 :   // allocate the hash table
     443             110 :   tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
     444             110 :   if (! tableptr) {
     445               0 :     delete dict;
     446               0 :     return 3;
     447                 :   }
     448             110 :   for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
     449                 : 
     450                 :   // loop through all words on much list and add to hash
     451                 :   // table and create word and affix strings
     452                 : 
     453             708 :   while ((ts = dict->getline())) {
     454             488 :     mychomp(ts);
     455                 :     // split each line into word and morphological description
     456             488 :     dp = ts;
     457             977 :     while ((dp = strchr(dp, ':'))) {
     458              30 :         if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
     459              29 :             for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
     460              29 :             if (dp < ts) { // missing word
     461               0 :                 dp = NULL;
     462                 :             } else {
     463              29 :                 *(dp + 1) = '\0';
     464              29 :                 dp = dp + 2;
     465                 :             }
     466              29 :             break;
     467                 :         }
     468               1 :         dp++;
     469                 :     }
     470                 : 
     471                 :     // tabulator is the old morphological field separator
     472             488 :     dp2 = strchr(ts, '\t');
     473             488 :     if (dp2 && (!dp || dp2 < dp)) {
     474              14 :         *dp2 = '\0';
     475              14 :         dp = dp2 + 1;
     476                 :     }
     477                 : 
     478                 :     // split each line into word and affix char strings
     479                 :     // "\/" signs slash in words (not affix separator)
     480                 :     // "/" at beginning of the line is word character (not affix separator)
     481             488 :     ap = strchr(ts,'/');
     482             984 :     while (ap) {
     483             317 :         if (ap == ts) {
     484               1 :             ap++;
     485               1 :             continue;
     486             316 :         } else if (*(ap - 1) != '\\') break;
     487                 :         // replace "\/" with "/"
     488               7 :         for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
     489               7 :         ap = strchr(ap,'/');
     490                 :     }
     491                 : 
     492             488 :     if (ap) {
     493             309 :       *ap = '\0';
     494             309 :       if (aliasf) {
     495               2 :         int index = atoi(ap + 1);
     496               2 :         al = get_aliasf(index, &flags, dict);
     497               2 :         if (!al) {
     498               0 :             HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
     499               0 :             *ap = '\0';
     500                 :         }
     501                 :       } else {
     502             307 :         al = decode_flags(&flags, ap + 1, dict);
     503             307 :         if (al == -1) {
     504               0 :             HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
     505               0 :             delete dict;
     506               0 :             return 6;
     507                 :         }
     508             307 :         flag_qsort(flags, 0, al);
     509                 :       }
     510                 :     } else {
     511             179 :       al = 0;
     512             179 :       ap = NULL;
     513             179 :       flags = NULL;
     514                 :     }
     515                 : 
     516                 :     int captype;
     517             488 :     int wbl = strlen(ts);
     518             488 :     int wcl = get_clen_and_captype(ts, wbl, &captype);
     519                 :     // add the word and its index plus its capitalized form optionally
     520             976 :     if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
     521             488 :         add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
     522               0 :         delete dict;
     523               0 :         return 5;
     524                 :     }
     525                 :   }
     526                 : 
     527             110 :   delete dict;
     528             110 :   return 0;
     529                 : }
     530                 : 
     531                 : // the hash function is a simple load and rotate
     532                 : // algorithm borrowed
     533                 : 
     534           15180 : int HashMgr::hash(const char * word) const
     535                 : {
     536           15180 :     long  hv = 0;
     537           67671 :     for (int i=0; i < 4  &&  *word != 0; i++)
     538           52491 :         hv = (hv << 8) | (*word++);
     539           92159 :     while (*word != 0) {
     540           61799 :       ROTATE(hv,ROTATE_LEN);
     541           61799 :       hv ^= (*word++);
     542                 :     }
     543           15180 :     return (unsigned long) hv % tablesize;
     544                 : }
     545                 : 
     546             446 : int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
     547                 :     int len;
     548             446 :     if (*flags == '\0') {
     549               1 :         *result = NULL;
     550               1 :         return 0;
     551                 :     }
     552             445 :     switch (flag_mode) {
     553                 :       case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
     554              45 :         len = strlen(flags);
     555              45 :         if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
     556              45 :         len /= 2;
     557              45 :         *result = (unsigned short *) malloc(len * sizeof(short));
     558              45 :         if (!*result) return -1;
     559             136 :         for (int i = 0; i < len; i++) {
     560              91 :             (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1]; 
     561                 :         }
     562              45 :         break;
     563                 :       }
     564                 :       case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
     565                 :         int i;
     566              32 :         len = 1;
     567              32 :         char * src = flags; 
     568                 :         unsigned short * dest;
     569                 :         char * p;
     570             283 :         for (p = flags; *p; p++) {
     571             251 :           if (*p == ',') len++;
     572                 :         }
     573              32 :         *result = (unsigned short *) malloc(len * sizeof(short));
     574              32 :         if (!*result) return -1;
     575              32 :         dest = *result;
     576             283 :         for (p = flags; *p; p++) {
     577             251 :           if (*p == ',') {
     578              25 :             i = atoi(src);
     579              25 :             if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
     580               0 :               af->getlinenum(), i, DEFAULTFLAGS - 1);
     581              25 :             *dest = (unsigned short) i;
     582              25 :             if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
     583              25 :             src = p + 1;
     584              25 :             dest++;
     585                 :           }
     586                 :         }
     587              32 :         i = atoi(src);
     588              32 :         if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
     589               0 :           af->getlinenum(), i, DEFAULTFLAGS - 1);
     590              32 :         *dest = (unsigned short) i;
     591              32 :         if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
     592              32 :         break;
     593                 :       }    
     594                 :       case FLAG_UNI: { // UTF-8 characters
     595                 :         w_char w[BUFSIZE/2];
     596               2 :         len = u8_u16(w, BUFSIZE/2, flags);
     597               2 :         *result = (unsigned short *) malloc(len * sizeof(short));
     598               2 :         if (!*result) return -1;
     599               2 :         memcpy(*result, w, len * sizeof(short));
     600               2 :         break;
     601                 :       }
     602                 :       default: { // Ispell's one-character flags (erfg -> e r f g)
     603                 :         unsigned short * dest;
     604             366 :         len = strlen(flags);
     605             366 :         *result = (unsigned short *) malloc(len * sizeof(short));
     606             366 :         if (!*result) return -1;
     607             366 :         dest = *result;
     608            1073 :         for (unsigned char * p = (unsigned char *) flags; *p; p++) {
     609             707 :           *dest = (unsigned short) *p;
     610             707 :           dest++;
     611                 :         }
     612                 :       }
     613                 :     }
     614             445 :     return len;
     615                 : }
     616                 : 
     617             725 : unsigned short HashMgr::decode_flag(const char * f) {
     618             725 :     unsigned short s = 0;
     619                 :     int i;
     620             725 :     switch (flag_mode) {
     621                 :       case FLAG_LONG:
     622              37 :         s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
     623              37 :         break;
     624                 :       case FLAG_NUM:
     625              22 :         i = atoi(f);
     626              22 :         if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);
     627              22 :         s = (unsigned short) i;
     628              22 :         break;
     629                 :       case FLAG_UNI:
     630               8 :         u8_u16((w_char *) &s, 1, f);
     631               8 :         break;
     632                 :       default:
     633             658 :         s = (unsigned short) *((unsigned char *)f);
     634                 :     }
     635             725 :     if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
     636             725 :     return s;
     637                 : }
     638                 : 
     639               0 : char * HashMgr::encode_flag(unsigned short f) {
     640                 :     unsigned char ch[10];
     641               0 :     if (f==0) return mystrdup("(NULL)");
     642               0 :     if (flag_mode == FLAG_LONG) {
     643               0 :         ch[0] = (unsigned char) (f >> 8);
     644               0 :         ch[1] = (unsigned char) (f - ((f >> 8) << 8));
     645               0 :         ch[2] = '\0';
     646               0 :     } else if (flag_mode == FLAG_NUM) {
     647               0 :         sprintf((char *) ch, "%d", f);
     648               0 :     } else if (flag_mode == FLAG_UNI) {
     649               0 :         u16_u8((char *) &ch, 10, (w_char *) &f, 1);
     650                 :     } else {
     651               0 :         ch[0] = (unsigned char) (f);
     652               0 :         ch[1] = '\0';
     653                 :     }
     654               0 :     return mystrdup((char *) ch);
     655                 : }
     656                 : 
     657                 : // read in aff file and set flag mode
     658             110 : int  HashMgr::load_config(const char * affpath, const char * key)
     659                 : {
     660                 :   char * line; // io buffers
     661             110 :   int firstline = 1;
     662                 :  
     663                 :   // open the affix file
     664             110 :   FileMgr * afflst = new FileMgr(affpath, key);
     665             110 :   if (!afflst) {
     666               0 :     HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
     667               0 :     return 1;
     668                 :   }
     669                 : 
     670                 :     // read in each line ignoring any that do not
     671                 :     // start with a known line type indicator
     672                 : 
     673            1105 :     while ((line = afflst->getline())) {
     674             939 :         mychomp(line);
     675                 : 
     676                 :        /* remove byte order mark */
     677             939 :        if (firstline) {
     678             110 :          firstline = 0;
     679             110 :          if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1);
     680                 :        }
     681                 : 
     682                 :         /* parse in the try string */
     683             939 :         if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
     684               8 :             if (flag_mode != FLAG_CHAR) {
     685               0 :                 HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum());
     686                 :             }
     687               8 :             if (strstr(line, "long")) flag_mode = FLAG_LONG;
     688               8 :             if (strstr(line, "num")) flag_mode = FLAG_NUM;
     689               8 :             if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
     690               8 :             if (flag_mode == FLAG_CHAR) {
     691               0 :                 HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum());
     692                 :             }
     693                 :         }
     694             939 :         if (strncmp(line,"FORBIDDENWORD",13) == 0) {
     695               7 :           char * st = NULL;
     696               7 :           if (parse_string(line, &st, afflst->getlinenum())) {
     697               0 :              delete afflst;
     698               0 :              return 1;
     699                 :           }
     700               7 :           forbiddenword = decode_flag(st);
     701               7 :           free(st);
     702                 :         }
     703             939 :         if (strncmp(line, "SET", 3) == 0) {
     704              37 :           if (parse_string(line, &enc, afflst->getlinenum())) {
     705               0 :              delete afflst;
     706               0 :              return 1;
     707                 :           }         
     708              37 :           if (strcmp(enc, "UTF-8") == 0) {
     709              28 :             utf8 = 1;
     710                 : #ifndef OPENOFFICEORG
     711                 : #ifndef MOZILLA_CLIENT
     712                 :             initialize_utf_tbl();
     713                 : #endif
     714                 : #endif
     715               9 :           } else csconv = get_current_cs(enc);
     716                 :         }
     717             939 :         if (strncmp(line, "LANG", 4) == 0) {
     718               0 :           if (parse_string(line, &lang, afflst->getlinenum())) {
     719               0 :              delete afflst;
     720               0 :              return 1;
     721                 :           }         
     722               0 :           langnum = get_lang_num(lang);
     723                 :         }
     724                 : 
     725                 :        /* parse in the ignored characters (for example, Arabic optional diacritics characters */
     726             939 :        if (strncmp(line,"IGNORE",6) == 0) {
     727               4 :           if (parse_array(line, &ignorechars, &ignorechars_utf16,
     728               4 :                  &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
     729               0 :              delete afflst;
     730               0 :              return 1;
     731                 :           }
     732                 :        }
     733                 : 
     734             939 :        if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
     735               2 :           if (parse_aliasf(line, afflst)) {
     736               0 :              delete afflst;
     737               0 :              return 1;
     738                 :           }
     739                 :        }
     740                 : 
     741             939 :        if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
     742               2 :           if (parse_aliasm(line, afflst)) {
     743               0 :              delete afflst;
     744               0 :              return 1;
     745                 :           }
     746                 :        }
     747                 : 
     748             939 :        if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
     749             939 :        if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
     750                 :     }
     751             110 :     if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);
     752             110 :     delete afflst;
     753             110 :     return 0;
     754                 : }
     755                 : 
     756                 : /* parse in the ALIAS table */
     757               2 : int  HashMgr::parse_aliasf(char * line, FileMgr * af)
     758                 : {
     759               2 :    if (numaliasf != 0) {
     760               0 :       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
     761               0 :       return 1;
     762                 :    }
     763               2 :    char * tp = line;
     764                 :    char * piece;
     765               2 :    int i = 0;
     766               2 :    int np = 0;
     767               2 :    piece = mystrsep(&tp, 0);
     768               8 :    while (piece) {
     769               4 :        if (*piece != '\0') {
     770               4 :           switch(i) {
     771               2 :              case 0: { np++; break; }
     772                 :              case 1: { 
     773               2 :                        numaliasf = atoi(piece);
     774               2 :                        if (numaliasf < 1) {
     775               0 :                           numaliasf = 0;
     776               0 :                           aliasf = NULL;
     777               0 :                           aliasflen = NULL;
     778               0 :                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
     779               0 :                           return 1;
     780                 :                        }
     781               2 :                        aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
     782               2 :                        aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
     783               2 :                        if (!aliasf || !aliasflen) {
     784               0 :                           numaliasf = 0;
     785               0 :                           if (aliasf) free(aliasf);
     786               0 :                           if (aliasflen) free(aliasflen);
     787               0 :                           aliasf = NULL;
     788               0 :                           aliasflen = NULL;
     789               0 :                           return 1;
     790                 :                        }
     791               2 :                        np++;
     792               2 :                        break;
     793                 :                      }
     794               0 :              default: break;
     795                 :           }
     796               4 :           i++;
     797                 :        }
     798               4 :        piece = mystrsep(&tp, 0);
     799                 :    }
     800               2 :    if (np != 2) {
     801               0 :       numaliasf = 0;
     802               0 :       free(aliasf);
     803               0 :       free(aliasflen);
     804               0 :       aliasf = NULL;
     805               0 :       aliasflen = NULL;
     806               0 :       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
     807               0 :       return 1;
     808                 :    } 
     809                 :  
     810                 :    /* now parse the numaliasf lines to read in the remainder of the table */
     811                 :    char * nl;
     812               6 :    for (int j=0; j < numaliasf; j++) {
     813               4 :         if (!(nl = af->getline())) return 1;
     814               4 :         mychomp(nl);
     815               4 :         tp = nl;
     816               4 :         i = 0;
     817               4 :         aliasf[j] = NULL;
     818               4 :         aliasflen[j] = 0;
     819               4 :         piece = mystrsep(&tp, 0);
     820              16 :         while (piece) {
     821               8 :            if (*piece != '\0') {
     822               8 :                switch(i) {
     823                 :                   case 0: {
     824               4 :                              if (strncmp(piece,"AF",2) != 0) {
     825               0 :                                  numaliasf = 0;
     826               0 :                                  free(aliasf);
     827               0 :                                  free(aliasflen);
     828               0 :                                  aliasf = NULL;
     829               0 :                                  aliasflen = NULL;
     830               0 :                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
     831               0 :                                  return 1;
     832                 :                              }
     833               4 :                              break;
     834                 :                           }
     835                 :                   case 1: {
     836               4 :                             aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af);
     837               4 :                             flag_qsort(aliasf[j], 0, aliasflen[j]);
     838               4 :                             break; 
     839                 :                           }
     840               0 :                   default: break;
     841                 :                }
     842               8 :                i++;
     843                 :            }
     844               8 :            piece = mystrsep(&tp, 0);
     845                 :         }
     846               4 :         if (!aliasf[j]) {
     847               0 :              free(aliasf);
     848               0 :              free(aliasflen);
     849               0 :              aliasf = NULL;
     850               0 :              aliasflen = NULL;
     851               0 :              numaliasf = 0;
     852               0 :              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
     853               0 :              return 1;
     854                 :         }
     855                 :    }
     856               2 :    return 0;
     857                 : }
     858                 : 
     859             293 : int HashMgr::is_aliasf() {
     860             293 :     return (aliasf != NULL);
     861                 : }
     862                 : 
     863               4 : int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) {
     864               4 :     if ((index > 0) && (index <= numaliasf)) {
     865               4 :         *fvec = aliasf[index - 1];
     866               4 :         return aliasflen[index - 1];
     867                 :     }
     868               0 :     HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index);
     869               0 :     *fvec = NULL;
     870               0 :     return 0;
     871                 : }
     872                 : 
     873                 : /* parse morph alias definitions */
     874               2 : int  HashMgr::parse_aliasm(char * line, FileMgr * af)
     875                 : {
     876               2 :    if (numaliasm != 0) {
     877               0 :       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
     878               0 :       return 1;
     879                 :    }
     880               2 :    char * tp = line;
     881                 :    char * piece;
     882               2 :    int i = 0;
     883               2 :    int np = 0;
     884               2 :    piece = mystrsep(&tp, 0);
     885               8 :    while (piece) {
     886               4 :        if (*piece != '\0') {
     887               4 :           switch(i) {
     888               2 :              case 0: { np++; break; }
     889                 :              case 1: { 
     890               2 :                        numaliasm = atoi(piece);
     891               2 :                        if (numaliasm < 1) {
     892               0 :                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
     893               0 :                           return 1;
     894                 :                        }
     895               2 :                        aliasm = (char **) malloc(numaliasm * sizeof(char *));
     896               2 :                        if (!aliasm) {
     897               0 :                           numaliasm = 0;
     898               0 :                           return 1;
     899                 :                        }
     900               2 :                        np++;
     901               2 :                        break;
     902                 :                      }
     903               0 :              default: break;
     904                 :           }
     905               4 :           i++;
     906                 :        }
     907               4 :        piece = mystrsep(&tp, 0);
     908                 :    }
     909               2 :    if (np != 2) {
     910               0 :       numaliasm = 0;
     911               0 :       free(aliasm);
     912               0 :       aliasm = NULL;
     913               0 :       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
     914               0 :       return 1;
     915                 :    } 
     916                 : 
     917                 :    /* now parse the numaliasm lines to read in the remainder of the table */
     918               2 :    char * nl = line;
     919               9 :    for (int j=0; j < numaliasm; j++) {
     920               7 :         if (!(nl = af->getline())) return 1;
     921               7 :         mychomp(nl);
     922               7 :         tp = nl;
     923               7 :         i = 0;
     924               7 :         aliasm[j] = NULL;
     925               7 :         piece = mystrsep(&tp, ' ');
     926              28 :         while (piece) {
     927              14 :            if (*piece != '\0') {
     928              14 :                switch(i) {
     929                 :                   case 0: {
     930               7 :                              if (strncmp(piece,"AM",2) != 0) {
     931               0 :                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
     932               0 :                                  numaliasm = 0;
     933               0 :                                  free(aliasm);
     934               0 :                                  aliasm = NULL;
     935               0 :                                  return 1;
     936                 :                              }
     937               7 :                              break;
     938                 :                           }
     939                 :                   case 1: {
     940                 :                             // add the remaining of the line
     941               7 :                             if (*tp) {
     942               1 :                                 *(tp - 1) = ' ';
     943               1 :                                 tp = tp + strlen(tp);
     944                 :                             }
     945               7 :                             if (complexprefixes) {
     946               4 :                                 if (utf8) reverseword_utf(piece);
     947               4 :                                     else reverseword(piece);
     948                 :                             }
     949               7 :                             aliasm[j] = mystrdup(piece);
     950               7 :                             if (!aliasm[j]) {
     951               0 :                                  numaliasm = 0;
     952               0 :                                  free(aliasm);
     953               0 :                                  aliasm = NULL;
     954               0 :                                  return 1;
     955                 :                             }
     956               7 :                             break; }
     957               0 :                   default: break;
     958                 :                }
     959              14 :                i++;
     960                 :            }
     961              14 :            piece = mystrsep(&tp, ' ');
     962                 :         }
     963               7 :         if (!aliasm[j]) {
     964               0 :              numaliasm = 0;
     965               0 :              free(aliasm);
     966               0 :              aliasm = NULL;
     967               0 :              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
     968               0 :              return 1;
     969                 :         }
     970                 :    }
     971               2 :    return 0;
     972                 : }
     973                 : 
     974             208 : int HashMgr::is_aliasm() {
     975             208 :     return (aliasm != NULL);
     976                 : }
     977                 : 
     978               7 : char * HashMgr::get_aliasm(int index) {
     979               7 :     if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
     980               0 :     HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
     981               0 :     return NULL;
     982                 : }

Generated by: LCOV version 1.7