1 : /* GRAPHITE2 LICENSING
2 :
3 : Copyright 2010, SIL International
4 : All rights reserved.
5 :
6 : This library is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU Lesser General Public License as published
8 : by the Free Software Foundation; either version 2.1 of License, or
9 : (at your option) any later version.
10 :
11 : This program is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : Lesser General Public License for more details.
15 :
16 : You should also have received a copy of the GNU Lesser General Public
17 : License along with this library in the file named "LICENSE".
18 : If not, write to the Free Software Foundation, 51 Franklin Street,
19 : Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
20 : internet at http://www.fsf.org/licenses/lgpl.html.
21 :
22 : Alternatively, the contents of this file may be used under the terms of the
23 : Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
24 : License, as published by the Free Software Foundation, either version 2
25 : of the License or (at your option) any later version.
26 : */
27 : #pragma once
28 : #include <cstring>
29 : #include <cassert>
30 :
31 : namespace graphite2 {
32 :
33 : struct IsoLangEntry
34 : {
35 : unsigned short mnLang;
36 : const char maLangStr[4];
37 : const char maCountry[3];
38 : };
39 :
40 : // Windows Language ID, Locale ISO-639 language, country code as used in
41 : // naming table of OpenType fonts
42 : const IsoLangEntry LANG_ENTRIES[] = {
43 : { 0x0401, "ar","SA" }, // Arabic Saudi Arabia
44 : { 0x0402, "bg","BG" }, // Bulgarian Bulgaria
45 : { 0x0403, "ca","ES" }, // Catalan Catalan
46 : { 0x0404, "zh","TW" }, // Chinese Taiwan
47 : { 0x0405, "cs","CZ" }, // Czech Czech Republic
48 : { 0x0406, "da","DK" }, // Danish Denmark
49 : { 0x0407, "de","DE" }, // German Germany
50 : { 0x0408, "el","GR" }, // Greek Greece
51 : { 0x0409, "en","US" }, // English United States
52 : { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain
53 : { 0x040B, "fi","FI" }, // Finnish Finland
54 : { 0x040C, "fr","FR" }, // French France
55 : { 0x040D, "he","IL" }, // Hebrew Israel
56 : { 0x040E, "hu","HU" }, // Hungarian Hungary
57 : { 0x040F, "is","IS" }, // Icelandic Iceland
58 : { 0x0410, "it","IT" }, // Italian Italy
59 : { 0x0411, "jp","JP" }, // Japanese Japan
60 : { 0x0412, "ko","KR" }, // Korean Korea
61 : { 0x0413, "nl","NL" }, // Dutch Netherlands
62 : { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway
63 : { 0x0415, "pl","PL" }, // Polish Poland
64 : { 0x0416, "pt","BR" }, // Portuguese Brazil
65 : { 0x0417, "rm","CH" }, // Romansh Switzerland
66 : { 0x0418, "ro","RO" }, // Romanian Romania
67 : { 0x0419, "ru","RU" }, // Russian Russia
68 : { 0x041A, "hr","HR" }, // Croatian Croatia
69 : { 0x041B, "sk","SK" }, // Slovak Slovakia
70 : { 0x041C, "sq","AL" }, // Albanian Albania
71 : { 0x041D, "sv","SE" }, // Swedish Sweden
72 : { 0x041E, "th","TH" }, // Thai Thailand
73 : { 0x041F, "tr","TR" }, // Turkish Turkey
74 : { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan
75 : { 0x0421, "id","ID" }, // Indonesian Indonesia
76 : { 0x0422, "uk","UA" }, // Ukrainian Ukraine
77 : { 0x0423, "be","BY" }, // Belarusian Belarus
78 : { 0x0424, "sl","SI" }, // Slovenian Slovenia
79 : { 0x0425, "et","EE" }, // Estonian Estonia
80 : { 0x0426, "lv","LV" }, // Latvian Latvia
81 : { 0x0427, "lt","LT" }, // Lithuanian Lithuania
82 : { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan
83 : { 0x042A, "vi","VN" }, // Vietnamese Vietnam
84 : { 0x042B, "hy","AM" }, // Armenian Armenia
85 : { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan
86 : { 0x042D, "eu","" }, // Basque Basque
87 : { 0x042E, "hsb","DE" }, // Upper Sorbian Germany
88 : { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia
89 : { 0x0432, "tn","ZA" }, // Setswana South Africa
90 : { 0x0434, "xh","ZA" }, // isiXhosa South Africa
91 : { 0x0435, "zu","ZA" }, // isiZulu South Africa
92 : { 0x0436, "af","ZA" }, // Afrikaans South Africa
93 : { 0x0437, "ka","GE" }, // Georgian Georgia
94 : { 0x0438, "fo","FO" }, // Faroese Faroe Islands
95 : { 0x0439, "hi","IN" }, // Hindi India
96 : { 0x043A, "mt","MT" }, // Maltese Malta
97 : { 0x043B, "se","NO" }, // Sami (Northern) Norway
98 : { 0x043E, "ms","MY" }, // Malay Malaysia
99 : { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan
100 : { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan
101 : { 0x0441, "sw","KE" }, // Kiswahili Kenya
102 : { 0x0442, "tk","TM" }, // Turkmen Turkmenistan
103 : { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan
104 : { 0x0444, "tt","RU" }, // Tatar Russia
105 : { 0x0445, "bn","IN" }, // Bengali India
106 : { 0x0446, "pa","IN" }, // Punjabi India
107 : { 0x0447, "gu","IN" }, // Gujarati India
108 : { 0x0448, "or","IN" }, // Oriya India
109 : { 0x0448, "wo","SN" }, // Wolof Senegal
110 : { 0x0449, "ta","IN" }, // Tamil India
111 : { 0x044A, "te","IN" }, // Telugu India
112 : { 0x044B, "kn","IN" }, // Kannada India
113 : { 0x044C, "ml","IN" }, // Malayalam India
114 : { 0x044D, "as","IN" }, // Assamese India
115 : { 0x044E, "mr","IN" }, // Marathi India
116 : { 0x044F, "sa","IN" }, // Sanskrit India
117 : { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia
118 : { 0x0451, "bo","CN" }, // Tibetan PRC
119 : { 0x0452, "cy","GB" }, // Welsh United Kingdom
120 : { 0x0453, "km","KH" }, // Khmer Cambodia
121 : { 0x0454, "lo","LA" }, // Lao Lao P.D.R.
122 : { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore
123 : { 0x0456, "gl","ES" }, // Galician Galician
124 : { 0x0457, "kok","IN" }, // Konkani India
125 : { 0x045A, "syr","TR" }, // Syriac Syria
126 : { 0x045B, "si","LK" }, // Sinhala Sri Lanka
127 : { 0x045D, "iu","CA" }, // Inuktitut Canada
128 : { 0x045E, "am","ET" }, // Amharic Ethiopia
129 : { 0x0461, "ne","NP" }, // Nepali Nepal
130 : { 0x0462, "fy","NL" }, // Frisian Netherlands
131 : { 0x0463, "ps","AF" }, // Pashto Afghanistan
132 : { 0x0464, "fil","PH" }, // Filipino Philippines
133 : { 0x0465, "dv","MV" }, // Divehi Maldives
134 : { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria
135 : { 0x046A, "yo","NG" }, // Yoruba Nigeria
136 : { 0x046B, "qu","BO" }, // Quechua Bolivia
137 : { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa
138 : { 0x046D, "ba","RU" }, // Bashkir Russia
139 : { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg
140 : { 0x046F, "kl","GL" }, // Greenlandic Greenland
141 : { 0x0470, "ig","NG" }, // Igbo Nigeria
142 : { 0x0478, "ii","CN" }, // Yi PRC
143 : { 0x047A, "arn","CL" }, // Mapudungun Chile
144 : { 0x047C, "moh","CA" }, // Mohawk Mohawk
145 : { 0x047E, "br","FR" }, // Breton France
146 : { 0x0480, "ug","CN" }, // Uighur PRC
147 : { 0x0481, "mi","NZ" }, // Maori New Zealand
148 : { 0x0482, "oc","FR" }, // Occitan France
149 : { 0x0483, "co","FR" }, // Corsican France
150 : { 0x0484, "gsw","FR" }, // Alsatian France
151 : { 0x0485, "sah","RU" }, // Yakut Russia
152 : { 0x0486, "qut","GT" }, // K'iche Guatemala
153 : { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda
154 : { 0x048C, "gbz","AF" }, // Dari Afghanistan
155 : { 0x0801, "ar","IQ" }, // Arabic Iraq
156 : { 0x0804, "zn","CH" }, // Chinese People's Republic of China
157 : { 0x0807, "de","CH" }, // German Switzerland
158 : { 0x0809, "en","GB" }, // English United Kingdom
159 : { 0x080A, "es","MX" }, // Spanish Mexico
160 : { 0x080C, "fr","BE" }, // French Belgium
161 : { 0x0810, "it","CH" }, // Italian Switzerland
162 : { 0x0813, "nl","BE" }, // Dutch Belgium
163 : { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway
164 : { 0x0816, "pt","PT" }, // Portuguese Portugal
165 : { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia
166 : { 0x081D, "sv","FI" }, // Sweden Finland
167 : { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan
168 : { 0x082E, "dsb","DE" }, // Lower Sorbian Germany
169 : { 0x083B, "se","SE" }, // Sami (Northern) Sweden
170 : { 0x083C, "ga","IE" }, // Irish Ireland
171 : { 0x083E, "ms","BN" }, // Malay Brunei Darussalam
172 : { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan
173 : { 0x0845, "bn","BD" }, // Bengali Bangladesh
174 : { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China
175 : { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada
176 : { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria
177 : { 0x086B, "es","EC" }, // Quechua Ecuador
178 : { 0x0C01, "ar","EG" }, // Arabic Egypt
179 : { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R.
180 : { 0x0C07, "de","AT" }, // German Austria
181 : { 0x0C09, "en","AU" }, // English Australia
182 : { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain
183 : { 0x0C0C, "fr","CA" }, // French Canada
184 : { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia
185 : { 0x0C3B, "se","FI" }, // Sami (Northern) Finland
186 : { 0x0C6B, "qu","PE" }, // Quechua Peru
187 : { 0x1001, "ar","LY" }, // Arabic Libya
188 : { 0x1004, "zh","SG" }, // Chinese Singapore
189 : { 0x1007, "de","LU" }, // German Luxembourg
190 : { 0x1009, "en","CA" }, // English Canada
191 : { 0x100A, "es","GT" }, // Spanish Guatemala
192 : { 0x100C, "fr","CH" }, // French Switzerland
193 : { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina
194 : { 0x103B, "smj","NO" }, // Sami (Lule) Norway
195 : { 0x1401, "ar","DZ" }, // Arabic Algeria
196 : { 0x1404, "zh","MO" }, // Chinese Macao S.A.R.
197 : { 0x1407, "de","LI" }, // German Liechtenstein
198 : { 0x1409, "en","NZ" }, // English New Zealand
199 : { 0x140A, "es","CR" }, // Spanish Costa Rica
200 : { 0x140C, "fr","LU" }, // French Luxembourg
201 : { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina
202 : { 0x143B, "smj","SE" }, // Sami (Lule) Sweden
203 : { 0x1801, "ar","MA" }, // Arabic Morocco
204 : { 0x1809, "en","IE" }, // English Ireland
205 : { 0x180A, "es","PA" }, // Spanish Panama
206 : { 0x180C, "fr","MC" }, // French Principality of Monoco
207 : { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina
208 : { 0x183B, "sma","NO" }, // Sami (Southern) Norway
209 : { 0x1C01, "ar","TN" }, // Arabic Tunisia
210 : { 0x1C09, "en","ZA" }, // English South Africa
211 : { 0x1C0A, "es","DO" }, // Spanish Dominican Republic
212 : { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina
213 : { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden
214 : { 0x2001, "ar","OM" }, // Arabic Oman
215 : { 0x2009, "en","JM" }, // English Jamaica
216 : { 0x200A, "es","VE" }, // Spanish Venezuela
217 : { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina
218 : { 0x203B, "sms","FI" }, // Sami (Skolt) Finland
219 : { 0x2401, "ar","YE" }, // Arabic Yemen
220 : { 0x2409, "en","BS" }, // English Caribbean
221 : { 0x240A, "es","CO" }, // Spanish Colombia
222 : { 0x243B, "smn","FI" }, // Sami (Inari) Finland
223 : { 0x2801, "ar","SY" }, // Arabic Syria
224 : { 0x2809, "en","BZ" }, // English Belize
225 : { 0x280A, "es","PE" }, // Spanish Peru
226 : { 0x2C01, "ar","JO" }, // Arabic Jordan
227 : { 0x2C09, "en","TT" }, // English Trinidad and Tobago
228 : { 0x2C0A, "es","AR" }, // Spanish Argentina
229 : { 0x3001, "ar","LB" }, // Arabic Lebanon
230 : { 0x3009, "en","ZW" }, // English Zimbabwe
231 : { 0x300A, "es","EC" }, // Spanish Ecuador
232 : { 0x3401, "ar","KW" }, // Arabic Kuwait
233 : { 0x3409, "en","PH" }, // English Republic of the Philippines
234 : { 0x340A, "es","CL" }, // Spanish Chile
235 : { 0x3801, "ar","AE" }, // Arabic U.A.E.
236 : { 0x380A, "es","UY" }, // Spanish Uruguay
237 : { 0x3C01, "ar","BH" }, // Arabic Bahrain
238 : { 0x3C0A, "es","PY" }, // Spanish Paraguay
239 : { 0x4001, "ar","QA" }, // Arabic Qatar
240 : { 0x4009, "en","IN" }, // English India
241 : { 0x400A, "es","BO" }, // Spanish Bolivia
242 : { 0x4409, "en","MY" }, // English Malaysia
243 : { 0x440A, "es","SV" }, // Spanish El Salvador
244 : { 0x4809, "en","SG" }, // English Singapore
245 : { 0x480A, "es","HN" }, // Spanish Honduras
246 : { 0x4C0A, "es","NI" }, // Spanish Nicaragua
247 : { 0x500A, "es","PR" }, // Spanish Puerto Rico
248 : { 0x540A, "es","US" } // Spanish United States
249 : };
250 :
251 : class Locale2Lang
252 : {
253 : public:
254 0 : Locale2Lang() : mSeedPosition(128)
255 : {
256 0 : memset((void*)mLangLookup, 0, sizeof(mLangLookup));
257 : // create a tri lookup on first 2 letters of language code
258 : static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
259 0 : for (int i = 0; i < maxIndex; i++)
260 : {
261 0 : size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a';
262 0 : size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a';
263 0 : if (mLangLookup[a][b])
264 : {
265 0 : const IsoLangEntry ** old = mLangLookup[a][b];
266 0 : int len = 1;
267 0 : while (old[len]) len++;
268 0 : len += 2;
269 0 : mLangLookup[a][b] = gralloc<const IsoLangEntry *>(len);
270 0 : mLangLookup[a][b][--len] = NULL;
271 0 : mLangLookup[a][b][--len] = &LANG_ENTRIES[i];
272 0 : while (--len >= 0)
273 : {
274 0 : assert(len >= 0);
275 0 : mLangLookup[a][b][len] = old[len];
276 : }
277 0 : free(old);
278 : }
279 : else
280 : {
281 0 : mLangLookup[a][b] = gralloc<const IsoLangEntry *>(2);
282 0 : mLangLookup[a][b][1] = NULL;
283 0 : mLangLookup[a][b][0] = &LANG_ENTRIES[i];
284 : }
285 : }
286 0 : while (2 * mSeedPosition < maxIndex)
287 0 : mSeedPosition *= 2;
288 0 : };
289 0 : ~Locale2Lang()
290 : {
291 0 : for (int i = 0; i != 26; ++i)
292 0 : for (int j = 0; j != 26; ++j)
293 0 : free(mLangLookup[i][j]);
294 0 : }
295 0 : unsigned short getMsId(const char * locale) const
296 : {
297 0 : size_t length = strlen(locale);
298 0 : size_t langLength = length;
299 0 : const char * language = locale;
300 0 : const char * script = NULL;
301 0 : const char * region = NULL;
302 0 : size_t regionLength = 0;
303 0 : const char * dash = strchr(locale, '-');
304 0 : if (dash && (dash != locale))
305 : {
306 0 : langLength = (dash - locale);
307 0 : size_t nextPartLength = length - langLength - 1;
308 0 : if (nextPartLength >= 2)
309 : {
310 0 : script = ++dash;
311 0 : dash = strchr(dash, '-');
312 0 : if (dash)
313 : {
314 0 : nextPartLength = (dash - script);
315 0 : region = ++dash;
316 : }
317 0 : if (nextPartLength == 2 &&
318 0 : (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) &&
319 0 : (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B))
320 : {
321 0 : region = script;
322 0 : regionLength = nextPartLength;
323 0 : script = NULL;
324 : }
325 0 : else if (nextPartLength == 4)
326 : {
327 0 : if (dash)
328 : {
329 0 : dash = strchr(dash, '-');
330 0 : if (dash)
331 : {
332 0 : nextPartLength = (dash - region);
333 : }
334 : else
335 : {
336 0 : nextPartLength = langLength - (region - locale);
337 : }
338 0 : regionLength = nextPartLength;
339 : }
340 : }
341 : }
342 : }
343 0 : size_t a = 'e' - 'a';
344 0 : size_t b = 'n' - 'a';
345 0 : unsigned short langId = 0;
346 0 : int i = 0;
347 0 : switch (langLength)
348 : {
349 : case 2:
350 : {
351 0 : a = language[0] - 'a';
352 0 : b = language[1] - 'a';
353 0 : if ((a < 26) && (b < 26) && mLangLookup[a][b])
354 : {
355 0 : while (mLangLookup[a][b][i])
356 : {
357 0 : if (mLangLookup[a][b][i]->maLangStr[2] != '\0')
358 : {
359 0 : ++i;
360 0 : continue;
361 : }
362 0 : if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
363 : {
364 0 : langId = mLangLookup[a][b][i]->mnLang;
365 0 : break;
366 : }
367 0 : else if (langId == 0)
368 : {
369 : // possible fallback code
370 0 : langId = mLangLookup[a][b][i]->mnLang;
371 : }
372 0 : ++i;
373 : }
374 : }
375 : }
376 0 : break;
377 : case 3:
378 : {
379 0 : a = language[0] - 'a';
380 0 : b = language[1] - 'a';
381 0 : if (mLangLookup[a][b])
382 : {
383 0 : while (mLangLookup[a][b][i])
384 : {
385 0 : if (mLangLookup[a][b][i]->maLangStr[2] != language[2])
386 : {
387 0 : ++i;
388 0 : continue;
389 : }
390 0 : if (strcmp(mLangLookup[a][b][i]->maCountry, region) == 0)
391 : {
392 0 : langId = mLangLookup[a][b][i]->mnLang;
393 0 : break;
394 : }
395 0 : else if (langId == 0)
396 : {
397 : // possible fallback code
398 0 : langId = mLangLookup[a][b][i]->mnLang;
399 : }
400 0 : ++i;
401 : }
402 : }
403 : }
404 0 : break;
405 : default:
406 0 : break;
407 : }
408 0 : if (langId == 0) langId = 0x409;
409 0 : return langId;
410 : }
411 : const IsoLangEntry * findEntryById(unsigned short langId) const
412 : {
413 : static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
414 : int window = mSeedPosition;
415 : int guess = mSeedPosition - 1;
416 : while (LANG_ENTRIES[guess].mnLang != langId)
417 : {
418 : window /= 2;
419 : if (window == 0) return NULL;
420 : guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window;
421 : while (guess >= maxIndex)
422 : {
423 : window /= 2;
424 : guess -= window;
425 : assert(window);
426 : }
427 : }
428 : return &LANG_ENTRIES[guess];
429 : }
430 : CLASS_NEW_DELETE
431 :
432 : private:
433 : const IsoLangEntry ** mLangLookup[26][26];
434 : int mSeedPosition;
435 : };
436 :
437 : } // namespace graphite2
|