1 : /******* BEGIN LICENSE BLOCK *******
2 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 : *
4 : * The contents of this file are subject to the Mozilla Public License Version
5 : * 1.1 (the "License"); you may not use this file except in compliance with
6 : * the License. You may obtain a copy of the License at
7 : * http://www.mozilla.org/MPL/
8 : *
9 : * Software distributed under the License is distributed on an "AS IS" basis,
10 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 : * for the specific language governing rights and limitations under the
12 : * License.
13 : *
14 : * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
15 : * and László Németh (Hunspell). Portions created by the Initial Developers
16 : * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
17 : *
18 : * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
19 : * David Einstein (deinst@world.std.com)
20 : * László Németh (nemethl@gyorsposta.hu)
21 : * Caolan McNamara (caolanm@redhat.com)
22 : * Davide Prina
23 : * Giuseppe Modugno
24 : * Gianluca Turconi
25 : * Simon Brouwer
26 : * Noll Janos
27 : * Biro Arpad
28 : * Goldman Eleonora
29 : * Sarlos Tamas
30 : * Bencsath Boldizsar
31 : * Halacsy Peter
32 : * Dvornik Laszlo
33 : * Gefferth Andras
34 : * Nagy Viktor
35 : * Varga Daniel
36 : * Chris Halls
37 : * Rene Engelhard
38 : * Bram Moolenaar
39 : * Dafydd Jones
40 : * Harri Pitkanen
41 : * Andras Timar
42 : * Tor Lillqvist
43 : *
44 : * Alternatively, the contents of this file may be used under the terms of
45 : * either the GNU General Public License Version 2 or later (the "GPL"), or
46 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
47 : * in which case the provisions of the GPL or the LGPL are applicable instead
48 : * of those above. If you wish to allow use of your version of this file only
49 : * under the terms of either the GPL or the LGPL, and not to allow others to
50 : * use your version of this file under the terms of the MPL, indicate your
51 : * decision by deleting the provisions above and replace them with the notice
52 : * and other provisions required by the GPL or the LGPL. If you do not delete
53 : * the provisions above, a recipient may use your version of this file under
54 : * the terms of any one of the MPL, the GPL or the LGPL.
55 : *
56 : ******* END LICENSE BLOCK *******/
57 :
58 : #ifndef __CSUTILHXX__
59 : #define __CSUTILHXX__
60 :
61 : #include "hunvisapi.h"
62 :
63 : // First some base level utility routines
64 :
65 : #include <string.h>
66 : #include "w_char.hxx"
67 : #include "htypes.hxx"
68 :
69 : #ifdef MOZILLA_CLIENT
70 : #include "nscore.h" // for mozalloc headers
71 : #endif
72 :
73 : // casing
74 : #define NOCAP 0
75 : #define INITCAP 1
76 : #define ALLCAP 2
77 : #define HUHCAP 3
78 : #define HUHINITCAP 4
79 :
80 : // default encoding and keystring
81 : #define SPELL_ENCODING "ISO8859-1"
82 : #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
83 :
84 : // default morphological fields
85 : #define MORPH_STEM "st:"
86 : #define MORPH_ALLOMORPH "al:"
87 : #define MORPH_POS "po:"
88 : #define MORPH_DERI_PFX "dp:"
89 : #define MORPH_INFL_PFX "ip:"
90 : #define MORPH_TERM_PFX "tp:"
91 : #define MORPH_DERI_SFX "ds:"
92 : #define MORPH_INFL_SFX "is:"
93 : #define MORPH_TERM_SFX "ts:"
94 : #define MORPH_SURF_PFX "sp:"
95 : #define MORPH_FREQ "fr:"
96 : #define MORPH_PHON "ph:"
97 : #define MORPH_HYPH "hy:"
98 : #define MORPH_PART "pa:"
99 : #define MORPH_FLAG "fl:"
100 : #define MORPH_HENTRY "_H:"
101 : #define MORPH_TAG_LEN strlen(MORPH_STEM)
102 :
103 : #define MSEP_FLD ' '
104 : #define MSEP_REC '\n'
105 : #define MSEP_ALT '\v'
106 :
107 : // default flags
108 : #define DEFAULTFLAGS 65510
109 : #define FORBIDDENWORD 65510
110 : #define ONLYUPCASEFLAG 65511
111 :
112 : // convert UTF-16 characters to UTF-8
113 : LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
114 :
115 : // convert UTF-8 characters to UTF-16
116 : LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
117 :
118 : // sort 2-byte vector
119 : LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
120 :
121 : // binary search in 2-byte vector
122 : LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
123 :
124 : // remove end of line char(s)
125 : LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
126 :
127 : // duplicate string
128 : LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
129 :
130 : // strcat for limited length destination string
131 : LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
132 :
133 : // duplicate reverse of string
134 : LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
135 :
136 : // parse into tokens with char delimiter
137 : LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
138 : // parse into tokens with char delimiter
139 : LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
140 :
141 : // parse into tokens with char delimiter
142 : LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
143 :
144 : // append s to ends of every lines in text
145 : LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
146 :
147 : // tokenize into lines with new line
148 : LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
149 :
150 : // tokenize into lines with new line and uniq in place
151 : LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
152 : LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
153 :
154 : // change oldchar to newchar in place
155 : LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
156 :
157 : // reverse word
158 : LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
159 :
160 : // reverse word
161 : LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
162 :
163 : // remove duplicates
164 : LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
165 :
166 : // free character array list
167 : LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
168 :
169 : // character encoding information
170 : struct cs_info {
171 : unsigned char ccase;
172 : unsigned char clower;
173 : unsigned char cupper;
174 : };
175 :
176 : LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
177 : LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
178 : LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
179 : LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
180 : LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
181 :
182 : LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
183 :
184 : // get language identifiers of language codes
185 : LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
186 :
187 : // get characters of the given 8bit encoding with lower- and uppercase forms
188 : LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
189 :
190 : // convert null terminated string to all caps using encoding
191 : LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
192 :
193 : // convert null terminated string to all little using encoding
194 : LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
195 :
196 : // convert null terminated string to have initial capital using encoding
197 : LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
198 :
199 : // convert null terminated string to all caps
200 : LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
201 :
202 : // convert null terminated string to all little
203 : LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
204 :
205 : // convert null terminated string to have initial capital
206 : LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
207 :
208 : // convert first nc characters of UTF-8 string to little
209 : LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
210 :
211 : // convert first nc characters of UTF-8 string to capital
212 : LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
213 :
214 : // get type of capitalization
215 : LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
216 :
217 : // get type of capitalization (UTF-8)
218 : LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
219 :
220 : // strip all ignored characters in the string
221 : LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
222 :
223 : // strip all ignored characters in the string
224 : LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
225 :
226 : LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
227 :
228 : LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
229 : int * out_utf16_len, int utf8, int ln);
230 :
231 : LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
232 : LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
233 :
234 : LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
235 :
236 : LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
237 :
238 : // conversion function for protected memory
239 : LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
240 :
241 : // conversion function for protected memory
242 : LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
243 :
244 : // hash entry macros
245 44 : LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
246 : {
247 : char *ret;
248 44 : if (!h->var)
249 0 : ret = NULL;
250 44 : else if (h->var & H_OPT_ALIASM)
251 2 : ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
252 : else
253 42 : ret = HENTRY_WORD(h) + h->blen + 1;
254 44 : return ret;
255 : }
256 :
257 : // NULL-free version for warning-free OOo build
258 0 : LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
259 : {
260 : const char *ret;
261 0 : if (!h->var)
262 0 : ret = "";
263 0 : else if (h->var & H_OPT_ALIASM)
264 0 : ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
265 : else
266 0 : ret = HENTRY_WORD(h) + h->blen + 1;
267 0 : return ret;
268 : }
269 :
270 0 : LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
271 : {
272 0 : return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
273 : }
274 :
275 : #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
276 :
277 : #endif
|