LCOV - code coverage report
Current view: directory - extensions/spellcheck/hunspell/src - phonet.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 147 8 5.4 %
Date: 2012-06-02 Functions: 4 1 25.0 %

       1                 : /******* BEGIN LICENSE BLOCK *******
       2                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       3                 :  * 
       4                 :  * The contents of this file are subject to the Mozilla Public License Version
       5                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       6                 :  * the License. You may obtain a copy of the License at
       7                 :  * http://www.mozilla.org/MPL/
       8                 :  * 
       9                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      10                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      11                 :  * for the specific language governing rights and limitations under the
      12                 :  * License.
      13                 :  * 
      14                 :  * The Initial Developer of the Original Code is Björn Jacke. Portions created
      15                 :  * by the Initial Developers are Copyright (C) 2000-2007 the Initial
      16                 :  * Developers. All Rights Reserved.
      17                 :  * 
      18                 :  * Contributor(s): Björn Jacke (bjoern.jacke@gmx.de)
      19                 :  *                 László Németh (nemethl@gyorsposta.hu)
      20                 :  *                 Caolan McNamara (caolanm@redhat.com)
      21                 :  * 
      22                 :  * Alternatively, the contents of this file may be used under the terms of
      23                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      24                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      25                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      26                 :  * of those above. If you wish to allow use of your version of this file only
      27                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      28                 :  * use your version of this file under the terms of the MPL, indicate your
      29                 :  * decision by deleting the provisions above and replace them with the notice
      30                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      31                 :  * the provisions above, a recipient may use your version of this file under
      32                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      33                 :  *
      34                 :  * Changelog:
      35                 :  *  2000-01-05  Björn Jacke <bjoern.jacke AT gmx.de>
      36                 :  *              Initial Release insprired by the article about phonetic
      37                 :  *              transformations out of c't 25/1999
      38                 :  *
      39                 :  *  2007-07-26  Björn Jacke <bjoern.jacke AT gmx.de>
      40                 :  *              Released under MPL/GPL/LGPL tri-license for Hunspell
      41                 :  *
      42                 :  *  2007-08-23  László Németh <nemeth at OOo>
      43                 :  *              Porting from Aspell to Hunspell using C-like structs
      44                 :  *
      45                 :  ******* END LICENSE BLOCK *******/
      46                 : 
      47                 : #include <stdlib.h> 
      48                 : #include <string.h>
      49                 : #include <stdio.h> 
      50                 : #include <ctype.h>
      51                 : 
      52                 : #include "csutil.hxx"
      53                 : #include "phonet.hxx"
      54                 : 
      55               1 : void init_phonet_hash(phonetable & parms) 
      56                 :   {
      57                 :     int i, k;
      58                 : 
      59             257 :     for (i = 0; i < HASHSIZE; i++) {
      60             256 :       parms.hash[i] = -1;
      61                 :     }
      62                 : 
      63             106 :     for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
      64                 :       /**  set hash value  **/
      65             105 :       k = (unsigned char) parms.rules[i][0];
      66                 : 
      67             105 :       if (parms.hash[k] < 0) {
      68              27 :         parms.hash[k] = i;
      69                 :       }
      70                 :     }
      71               1 :   }
      72                 : 
      73                 : // like strcpy but safe if the strings overlap
      74                 : //   but only if dest < src
      75               0 : static inline void strmove(char * dest, char * src) {
      76               0 :   while (*src) 
      77               0 :     *dest++ = *src++;
      78               0 :   *dest = '\0';
      79               0 : }
      80                 : 
      81               0 : static int myisalpha(char ch) {
      82               0 :   if ((unsigned char) ch < 128) return isalpha(ch);
      83               0 :   return 1;
      84                 : }
      85                 : 
      86                 : /*  phonetic transcription algorithm                   */
      87                 : /*  see: http://aspell.net/man-html/Phonetic-Code.html */
      88                 : /*  convert string to uppercase before this call       */
      89               0 : int phonet (const char * inword, char * target,
      90                 :               int len,
      91                 :               phonetable & parms)
      92                 :   {
      93                 :     /**       Do phonetic transformation.       **/
      94                 :     /**  "len" = length of "inword" incl. '\0'. **/
      95                 : 
      96                 :     /**  result:  >= 0:  length of "target"    **/
      97                 :     /**            otherwise:  error            **/
      98                 : 
      99               0 :     int  i,j,k=0,n,p,z;
     100               0 :     int  k0,n0,p0=-333,z0;
     101                 :     char c, c0;
     102                 :     const char * s;
     103                 :     typedef unsigned char uchar;    
     104                 :     char word[MAXPHONETUTF8LEN + 1];
     105               0 :     if (len == -1) len = strlen(inword);
     106               0 :     if (len > MAXPHONETUTF8LEN) return 0;
     107               0 :     strcpy(word, inword);
     108                 :   
     109                 :     /**  check word  **/
     110               0 :     i = j = z = 0;
     111               0 :     while ((c = word[i]) != '\0') {
     112               0 :       n = parms.hash[(uchar) c];
     113               0 :       z0 = 0;
     114                 : 
     115               0 :       if (n >= 0) {
     116                 :         /**  check all rules for the same letter  **/
     117               0 :         while (parms.rules[n][0] == c) {
     118                 : 
     119                 :           /**  check whole string  **/
     120               0 :           k = 1;   /** number of found letters  **/
     121               0 :           p = 5;   /** default priority  **/
     122               0 :           s = parms.rules[n];
     123               0 :           s++;     /**  important for (see below)  "*(s-1)"  **/
     124                 :           
     125               0 :           while (*s != '\0'  &&  word[i+k] == *s
     126               0 :                  &&  !isdigit ((unsigned char) *s)  &&  strchr ("(-<^$", *s) == NULL) {
     127               0 :             k++;
     128               0 :             s++;
     129                 :           }
     130               0 :           if (*s == '(') {
     131                 :             /**  check letters in "(..)"  **/
     132               0 :             if (myisalpha(word[i+k])  // ...could be implied?
     133               0 :                 && strchr(s+1, word[i+k]) != NULL) {
     134               0 :               k++;
     135               0 :               while (*s != ')')
     136               0 :                 s++;
     137               0 :               s++;
     138                 :             }
     139                 :           }
     140               0 :           p0 = (int) *s;
     141               0 :           k0 = k;
     142               0 :           while (*s == '-'  &&  k > 1) {
     143               0 :             k--;
     144               0 :             s++;
     145                 :           }
     146               0 :           if (*s == '<')
     147               0 :             s++;
     148               0 :           if (isdigit ((unsigned char) *s)) {
     149                 :             /**  determine priority  **/
     150               0 :             p = *s - '0';
     151               0 :             s++;
     152                 :           }
     153               0 :           if (*s == '^'  &&  *(s+1) == '^')
     154               0 :             s++;
     155                 : 
     156               0 :           if (*s == '\0'
     157                 :               || (*s == '^'  
     158               0 :                   && (i == 0  ||  ! myisalpha(word[i-1]))
     159               0 :                   && (*(s+1) != '$'
     160               0 :                       || (! myisalpha(word[i+k0]) )))
     161                 :               || (*s == '$'  &&  i > 0  
     162               0 :                   &&  myisalpha(word[i-1])
     163               0 :                   && (! myisalpha(word[i+k0]) ))) 
     164                 :           {
     165                 :             /**  search for followup rules, if:     **/
     166                 :             /**  parms.followup and k > 1  and  NO '-' in searchstring **/
     167               0 :             c0 = word[i+k-1];
     168               0 :             n0 = parms.hash[(uchar) c0];
     169                 : 
     170                 : //            if (parms.followup  &&  k > 1  &&  n0 >= 0
     171               0 :             if (k > 1  &&  n0 >= 0
     172               0 :                 &&  p0 != (int) '-'  &&  word[i+k] != '\0') {
     173                 :               /**  test follow-up rule for "word[i+k]"  **/
     174               0 :               while (parms.rules[n0][0] == c0) {
     175                 : 
     176                 :                 /**  check whole string  **/
     177               0 :                 k0 = k;
     178               0 :                 p0 = 5;
     179               0 :                 s = parms.rules[n0];
     180               0 :                 s++;
     181               0 :                 while (*s != '\0'  &&  word[i+k0] == *s
     182               0 :                        && ! isdigit((unsigned char) *s)  &&  strchr("(-<^$",*s) == NULL) {
     183               0 :                   k0++;
     184               0 :                   s++;
     185                 :                 }
     186               0 :                 if (*s == '(') {
     187                 :                   /**  check letters  **/
     188               0 :                   if (myisalpha(word[i+k0])
     189               0 :                       &&  strchr (s+1, word[i+k0]) != NULL) {
     190               0 :                     k0++;
     191               0 :                     while (*s != ')'  &&  *s != '\0')
     192               0 :                       s++;
     193               0 :                     if (*s == ')')
     194               0 :                       s++;
     195                 :                   }
     196                 :                 }
     197               0 :                 while (*s == '-') {
     198                 :                   /**  "k0" gets NOT reduced   **/
     199                 :                   /**  because "if (k0 == k)"  **/
     200               0 :                   s++;
     201                 :                 }
     202               0 :                 if (*s == '<')
     203               0 :                   s++;
     204               0 :                 if (isdigit ((unsigned char) *s)) {
     205               0 :                   p0 = *s - '0';
     206               0 :                   s++;
     207                 :                 }
     208                 : 
     209               0 :                 if (*s == '\0'
     210                 :                     /**  *s == '^' cuts  **/
     211               0 :                     || (*s == '$'  &&  ! myisalpha(word[i+k0]))) 
     212                 :                 {
     213               0 :                   if (k0 == k) {
     214                 :                     /**  this is just a piece of the string  **/
     215               0 :                     n0 += 2;
     216               0 :                     continue;
     217                 :                   }
     218                 : 
     219               0 :                   if (p0 < p) {
     220                 :                     /**  priority too low  **/
     221               0 :                     n0 += 2;
     222               0 :                     continue;
     223                 :                   }
     224                 :                   /**  rule fits; stop search  **/
     225               0 :                   break;
     226                 :                 }
     227               0 :                 n0 += 2;
     228                 :               } /**  End of "while (parms.rules[n0][0] == c0)"  **/
     229                 : 
     230               0 :               if (p0 >= p  && parms.rules[n0][0] == c0) {
     231               0 :                 n += 2;
     232               0 :                 continue;
     233                 :               }
     234                 :             } /** end of follow-up stuff **/
     235                 : 
     236                 :             /**  replace string  **/
     237               0 :             s = parms.rules[n+1];
     238               0 :             p0 = (parms.rules[n][0] != '\0'
     239               0 :                  &&  strchr (parms.rules[n]+1,'<') != NULL) ? 1:0;
     240               0 :             if (p0 == 1 &&  z == 0) {
     241                 :               /**  rule with '<' is used  **/
     242               0 :               if (j > 0  &&  *s != '\0'
     243               0 :                  && (target[j-1] == c  ||  target[j-1] == *s)) {
     244               0 :                 j--;
     245                 :               }
     246               0 :               z0 = 1;
     247               0 :               z = 1;
     248               0 :               k0 = 0;
     249               0 :               while (*s != '\0'  &&  word[i+k0] != '\0') {
     250               0 :                 word[i+k0] = *s;
     251               0 :                 k0++;
     252               0 :                 s++;
     253                 :               }
     254               0 :               if (k > k0)
     255               0 :                 strmove (&word[0]+i+k0, &word[0]+i+k);
     256                 : 
     257                 :               /**  new "actual letter"  **/
     258               0 :               c = word[i];
     259                 :             }
     260                 :             else { /** no '<' rule used **/
     261               0 :               i += k - 1;
     262               0 :               z = 0;
     263               0 :               while (*s != '\0'
     264               0 :                      &&  *(s+1) != '\0'  &&  j < len) {
     265               0 :                 if (j == 0  ||  target[j-1] != *s) {
     266               0 :                   target[j] = *s;
     267               0 :                   j++;
     268                 :                 }
     269               0 :                 s++;
     270                 :               }
     271                 :               /**  new "actual letter"  **/
     272               0 :               c = *s;
     273               0 :               if (parms.rules[n][0] != '\0'
     274               0 :                  &&  strstr (parms.rules[n]+1, "^^") != NULL) {
     275               0 :                 if (c != '\0') {
     276               0 :                   target[j] = c;
     277               0 :                   j++;
     278                 :                 }
     279               0 :                 strmove (&word[0], &word[0]+i+1);
     280               0 :                 i = 0;
     281               0 :                 z0 = 1;
     282                 :               }
     283                 :             }
     284               0 :             break;
     285                 :           }  /** end of follow-up stuff **/
     286               0 :           n += 2;
     287                 :         } /**  end of while (parms.rules[n][0] == c)  **/
     288                 :       } /**  end of if (n >= 0)  **/
     289               0 :       if (z0 == 0) {
     290                 : //        if (k && (assert(p0!=-333),!p0) &&  j < len &&  c != '\0'
     291                 : //           && (!parms.collapse_result  ||  j == 0  ||  target[j-1] != c)){
     292               0 :         if (k && !p0 && j < len &&  c != '\0'
     293                 :            && (1 || j == 0  ||  target[j-1] != c)){
     294                 :            /**  condense only double letters  **/
     295               0 :           target[j] = c;
     296                 :           ///printf("\n setting \n");
     297               0 :           j++;
     298                 :         }
     299                 : 
     300               0 :         i++;
     301               0 :         z = 0;
     302               0 :         k=0;
     303                 :       }
     304                 :     }  /**  end of   while ((c = word[i]) != '\0')  **/
     305                 : 
     306               0 :     target[j] = '\0';
     307               0 :     return (j);
     308                 : 
     309                 :   }  /**  end of function "phonet"  **/

Generated by: LCOV version 1.7