1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 :
3 : /* This file is modified from JPNIC's mDNKit, it is under both MPL and
4 : * JPNIC's license.
5 : */
6 :
7 : /* ***** BEGIN LICENSE BLOCK *****
8 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
9 : *
10 : * The contents of this file are subject to the Mozilla Public License Version
11 : * 1.1 (the "License"); you may not use this file except in compliance with
12 : * the License. You may obtain a copy of the License at
13 : * http://www.mozilla.org/MPL/
14 : *
15 : * Software distributed under the License is distributed on an "AS IS" basis,
16 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
17 : * for the specific language governing rights and limitations under the
18 : * License.
19 : *
20 : * The Original Code is Unicode case conversion helpers.
21 : *
22 : * The Initial Developer of the Original Code is
23 : * Netscape Communications Corp..
24 : * Portions created by the Initial Developer are Copyright (C) 2002
25 : * the Initial Developer. All Rights Reserved.
26 : *
27 : * Contributor(s):
28 : *
29 : * Alternatively, the contents of this file may be used under the terms of
30 : * either the GNU General Public License Version 2 or later (the "GPL"), or
31 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
32 : * in which case the provisions of the GPL or the LGPL are applicable instead
33 : * of those above. If you wish to allow use of your version of this file only
34 : * under the terms of either the GPL or the LGPL, and not to allow others to
35 : * use your version of this file under the terms of the MPL, indicate your
36 : * decision by deleting the provisions above and replace them with the notice
37 : * and other provisions required by the GPL or the LGPL. If you do not delete
38 : * the provisions above, a recipient may use your version of this file under
39 : * the terms of any one of the MPL, the GPL or the LGPL.
40 : *
41 : * ***** END LICENSE BLOCK ***** */
42 :
43 : /*
44 : * Copyright (c) 2000,2002 Japan Network Information Center.
45 : * All rights reserved.
46 : *
47 : * By using this file, you agree to the terms and conditions set forth bellow.
48 : *
49 : * LICENSE TERMS AND CONDITIONS
50 : *
51 : * The following License Terms and Conditions apply, unless a different
52 : * license is obtained from Japan Network Information Center ("JPNIC"),
53 : * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
54 : * Chiyoda-ku, Tokyo 101-0047, Japan.
55 : *
56 : * 1. Use, Modification and Redistribution (including distribution of any
57 : * modified or derived work) in source and/or binary forms is permitted
58 : * under this License Terms and Conditions.
59 : *
60 : * 2. Redistribution of source code must retain the copyright notices as they
61 : * appear in each source code file, this License Terms and Conditions.
62 : *
63 : * 3. Redistribution in binary form must reproduce the Copyright Notice,
64 : * this License Terms and Conditions, in the documentation and/or other
65 : * materials provided with the distribution. For the purposes of binary
66 : * distribution the "Copyright Notice" refers to the following language:
67 : * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
68 : *
69 : * 4. The name of JPNIC may not be used to endorse or promote products
70 : * derived from this Software without specific prior written approval of
71 : * JPNIC.
72 : *
73 : * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
74 : * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
75 : * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
76 : * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
77 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
78 : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
79 : * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
80 : * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
81 : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
82 : * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
83 : * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
84 : */
85 :
86 : #include <stdlib.h>
87 : #include <string.h>
88 :
89 : #include "nsUnicharUtils.h"
90 : #include "nsMemory.h"
91 : #include "nsCRT.h"
92 : #include "nsUnicodeNormalizer.h"
93 : #include "nsString.h"
94 : #include "nsReadableUtils.h"
95 :
96 12768 : NS_IMPL_ISUPPORTS1(nsUnicodeNormalizer, nsIUnicodeNormalizer)
97 :
98 :
99 1419 : nsUnicodeNormalizer::nsUnicodeNormalizer()
100 : {
101 1419 : }
102 :
103 2832 : nsUnicodeNormalizer::~nsUnicodeNormalizer()
104 : {
105 5664 : }
106 :
107 :
108 :
109 : #define NS_ERROR_UNORM_MOREOUTPUT \
110 : NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_GENERAL, 0x21)
111 :
112 : #define NS_SUCCESS_UNORM_NOTFOUND \
113 : NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_GENERAL, 0x11)
114 :
115 :
116 : #define END_BIT 0x80000000
117 :
118 :
119 : /*
120 : * Some constants for Hangul decomposition/composition.
121 : * These things were taken from unicode book.
122 : */
123 : #define SBase 0xac00
124 : #define LBase 0x1100
125 : #define VBase 0x1161
126 : #define TBase 0x11a7
127 : #define LCount 19
128 : #define VCount 21
129 : #define TCount 28
130 : #define SLast (SBase + LCount * VCount * TCount)
131 :
132 : struct composition {
133 : PRUint32 c2; /* 2nd character */
134 : PRUint32 comp; /* composed character */
135 : };
136 :
137 :
138 : #include "normalization_data.h"
139 :
140 : /*
141 : * Macro for multi-level index table.
142 : */
143 : #define LOOKUPTBL(vprefix, mprefix, v) \
144 : DMAP(vprefix)[\
145 : IMAP(vprefix)[\
146 : IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\
147 : ]\
148 : ].tbl[IDX2(mprefix, v)]
149 :
150 : #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))
151 : #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))
152 : #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))
153 :
154 : #define IDX_0(v, bits1, bits2) ((v) >> ((bits1) + (bits2)))
155 : #define IDX_1(v, bits1, bits2) (((v) >> (bits2)) & ((1 << (bits1)) - 1))
156 : #define IDX_2(v, bits1, bits2) ((v) & ((1 << (bits2)) - 1))
157 :
158 : #define BITS1(mprefix) mprefix ## _BITS_1
159 : #define BITS2(mprefix) mprefix ## _BITS_2
160 :
161 : #define IMAP(vprefix) vprefix ## _imap
162 : #define DMAP(vprefix) vprefix ## _table
163 : #define SEQ(vprefix) vprefix ## _seq
164 :
165 : static PRInt32
166 3940 : canonclass(PRUint32 c) {
167 : /* Look up canonicalclass table. */
168 3940 : return (LOOKUPTBL(canon_class, CANON_CLASS, c));
169 : }
170 :
171 : static PRInt32
172 3920 : decompose_char(PRUint32 c, const PRUint32 **seqp)
173 : {
174 : /* Look up decomposition table. */
175 3920 : PRInt32 seqidx = LOOKUPTBL(decompose, DECOMP, c);
176 3920 : *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);
177 3920 : return (seqidx);
178 : }
179 :
180 : static PRInt32
181 6191 : compose_char(PRUint32 c,
182 : const struct composition **compp)
183 : {
184 : /* Look up composition table. */
185 6191 : PRInt32 seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);
186 6191 : *compp = SEQ(compose) + (seqidx & 0xffff);
187 6191 : return (seqidx >> 16);
188 : }
189 :
190 : static nsresult
191 3924 : mdn__unicode_decompose(PRInt32 compat, PRUint32 *v, size_t vlen,
192 : PRUint32 c, PRInt32 *decomp_lenp)
193 : {
194 3924 : PRUint32 *vorg = v;
195 : PRInt32 seqidx;
196 : const PRUint32 *seq;
197 :
198 : //assert(v != NULL && vlen >= 0 && decomp_lenp != NULL);
199 :
200 : /*
201 : * First, check for Hangul.
202 : */
203 3924 : if (SBase <= c && c < SLast) {
204 : PRInt32 idx, t_offset, v_offset, l_offset;
205 :
206 4 : idx = c - SBase;
207 4 : t_offset = idx % TCount;
208 4 : idx /= TCount;
209 4 : v_offset = idx % VCount;
210 4 : l_offset = idx / VCount;
211 4 : if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
212 0 : return (NS_ERROR_UNORM_MOREOUTPUT);
213 4 : *v++ = LBase + l_offset;
214 4 : *v++ = VBase + v_offset;
215 4 : if (t_offset > 0)
216 4 : *v++ = TBase + t_offset;
217 4 : *decomp_lenp = v - vorg;
218 4 : return (NS_OK);
219 : }
220 :
221 : /*
222 : * Look up decomposition table. If no decomposition is defined
223 : * or if it is a compatibility decomosition when canonical
224 : * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.
225 : */
226 3920 : seqidx = decompose_char(c, &seq);
227 3920 : if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
228 3689 : return (NS_SUCCESS_UNORM_NOTFOUND);
229 :
230 : /*
231 : * Copy the decomposed sequence. The end of the sequence are
232 : * marked with END_BIT.
233 : */
234 460 : do {
235 : PRUint32 c;
236 : PRInt32 dlen;
237 : nsresult r;
238 :
239 460 : c = *seq & ~END_BIT;
240 :
241 : /* Decompose recursively. */
242 460 : r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);
243 460 : if (r == NS_OK) {
244 0 : v += dlen;
245 0 : vlen -= dlen;
246 460 : } else if (r == NS_SUCCESS_UNORM_NOTFOUND) {
247 460 : if (vlen < 1)
248 0 : return (NS_ERROR_UNORM_MOREOUTPUT);
249 460 : *v++ = c;
250 460 : vlen--;
251 : } else {
252 0 : return (r);
253 : }
254 :
255 : } while ((*seq++ & END_BIT) == 0);
256 :
257 231 : *decomp_lenp = v - vorg;
258 :
259 231 : return (NS_OK);
260 : }
261 :
262 : static PRInt32
263 3458 : mdn__unicode_iscompositecandidate(PRUint32 c)
264 : {
265 : const struct composition *dummy;
266 :
267 : /* Check for Hangul */
268 3458 : if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
269 12 : return (1);
270 :
271 : /*
272 : * Look up composition table. If there are no composition
273 : * that begins with the given character, it is not a
274 : * composition candidate.
275 : */
276 3446 : if (compose_char(c, &dummy) == 0)
277 509 : return (0);
278 : else
279 2937 : return (1);
280 : }
281 :
282 : static nsresult
283 2753 : mdn__unicode_compose(PRUint32 c1, PRUint32 c2, PRUint32 *compp)
284 : {
285 : PRInt32 n;
286 : PRInt32 lo, hi;
287 : const struct composition *cseq;
288 :
289 : //assert(compp != NULL);
290 :
291 : /*
292 : * Check for Hangul.
293 : */
294 2753 : if (LBase <= c1 && c1 < LBase + LCount &&
295 : VBase <= c2 && c2 < VBase + VCount) {
296 : /*
297 : * Hangul L and V.
298 : */
299 : *compp = SBase +
300 4 : ((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
301 4 : return (NS_OK);
302 2749 : } else if (SBase <= c1 && c1 < SLast &&
303 : TBase <= c2 && c2 < TBase + TCount &&
304 : (c1 - SBase) % TCount == 0) {
305 : /*
306 : * Hangul LV and T.
307 : */
308 4 : *compp = c1 + (c2 - TBase);
309 4 : return (NS_OK);
310 : }
311 :
312 : /*
313 : * Look up composition table. If the result is 0, no composition
314 : * is defined. Otherwise, upper 16bits of the result contains
315 : * the number of composition that begins with 'c1', and the lower
316 : * 16bits is the offset in 'compose_seq'.
317 : */
318 2745 : if ((n = compose_char(c1, &cseq)) == 0)
319 97 : return (NS_SUCCESS_UNORM_NOTFOUND);
320 :
321 : /*
322 : * The composite sequences are sorted by the 2nd character 'c2'.
323 : * So we can use binary search.
324 : */
325 2648 : lo = 0;
326 2648 : hi = n - 1;
327 11933 : while (lo <= hi) {
328 6868 : PRInt32 mid = (lo + hi) / 2;
329 :
330 6868 : if (cseq[mid].c2 < c2) {
331 240 : lo = mid + 1;
332 6628 : } else if (cseq[mid].c2 > c2) {
333 6397 : hi = mid - 1;
334 : } else {
335 231 : *compp = cseq[mid].comp;
336 231 : return (NS_OK);
337 : }
338 : }
339 2417 : return (NS_SUCCESS_UNORM_NOTFOUND);
340 : }
341 :
342 :
343 : #define WORKBUF_SIZE 128
344 : #define WORKBUF_SIZE_MAX 10000
345 :
346 : typedef struct {
347 : PRInt32 cur; /* pointing now processing character */
348 : PRInt32 last; /* pointing just after the last character */
349 : PRInt32 size; /* size of UCS and CLASS array */
350 : PRUint32 *ucs; /* UCS-4 characters */
351 : PRInt32 *cclass; /* and their canonical classes */
352 : PRUint32 ucs_buf[WORKBUF_SIZE]; /* local buffer */
353 : PRInt32 class_buf[WORKBUF_SIZE]; /* ditto */
354 : } workbuf_t;
355 :
356 : static nsresult decompose(workbuf_t *wb, PRUint32 c, PRInt32 compat);
357 : static void get_class(workbuf_t *wb);
358 : static void reorder(workbuf_t *wb);
359 : static void compose(workbuf_t *wb);
360 : static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);
361 : static void workbuf_init(workbuf_t *wb);
362 : static void workbuf_free(workbuf_t *wb);
363 : static nsresult workbuf_extend(workbuf_t *wb);
364 : static nsresult workbuf_append(workbuf_t *wb, PRUint32 c);
365 : static void workbuf_shift(workbuf_t *wb, PRInt32 shift);
366 : static void workbuf_removevoid(workbuf_t *wb);
367 :
368 :
369 : static nsresult
370 567 : mdn_normalize(bool do_composition, bool compat,
371 : const nsAString& aSrcStr, nsAString& aToStr)
372 : {
373 : workbuf_t wb;
374 567 : nsresult r = NS_OK;
375 : /*
376 : * Initialize working buffer.
377 : */
378 567 : workbuf_init(&wb);
379 :
380 567 : nsAString::const_iterator start, end;
381 567 : aSrcStr.BeginReading(start);
382 567 : aSrcStr.EndReading(end);
383 :
384 4598 : while (start != end) {
385 : PRUint32 c;
386 : PRUnichar curChar;
387 :
388 : //assert(wb.cur == wb.last);
389 :
390 : /*
391 : * Get one character from 'from'.
392 : */
393 3464 : curChar= *start++;
394 :
395 3464 : if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {
396 0 : c = SURROGATE_TO_UCS4(curChar, *start);
397 0 : ++start;
398 : } else {
399 3464 : c = curChar;
400 : }
401 :
402 : /*
403 : * Decompose it.
404 : */
405 3464 : if ((r = decompose(&wb, c, compat)) != NS_OK)
406 0 : break;
407 :
408 : /*
409 : * Get canonical class.
410 : */
411 3464 : get_class(&wb);
412 :
413 : /*
414 : * Reorder & compose.
415 : */
416 7165 : for (; wb.cur < wb.last; wb.cur++) {
417 3701 : if (wb.cur == 0) {
418 567 : continue;
419 3134 : } else if (wb.cclass[wb.cur] > 0) {
420 : /*
421 : * This is not a starter. Try reordering.
422 : * Note that characters up to it are
423 : * already in canonical order.
424 : */
425 243 : reorder(&wb);
426 243 : continue;
427 : }
428 :
429 : /*
430 : * This is a starter character, and there are
431 : * some characters before it. Those characters
432 : * have been reordered properly, and
433 : * ready for composition.
434 : */
435 2891 : if (do_composition && wb.cclass[0] == 0)
436 2891 : compose(&wb);
437 :
438 : /*
439 : * If CUR points to a starter character,
440 : * then process of characters before CUR are
441 : * already finished, because any further
442 : * reordering/composition for them are blocked
443 : * by the starter CUR points.
444 : */
445 2891 : if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
446 : /* Flush everything before CUR. */
447 2883 : r = flush_before_cur(&wb, aToStr);
448 2883 : if (r != NS_OK)
449 0 : break;
450 : }
451 : }
452 : }
453 :
454 567 : if (r == NS_OK) {
455 567 : if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
456 : /*
457 : * There is some characters left in WB.
458 : * They are ordered, but not composed yet.
459 : * Now CUR points just after the last character in WB,
460 : * and since compose() tries to compose characters
461 : * between top and CUR inclusive, we must make CUR
462 : * one character back during compose().
463 : */
464 567 : wb.cur--;
465 567 : compose(&wb);
466 567 : wb.cur++;
467 : }
468 : /*
469 : * Call this even when WB.CUR == 0, to make TO
470 : * NUL-terminated.
471 : */
472 567 : r = flush_before_cur(&wb, aToStr);
473 : }
474 :
475 567 : workbuf_free(&wb);
476 :
477 567 : return (r);
478 : }
479 :
480 : static nsresult
481 3464 : decompose(workbuf_t *wb, PRUint32 c, PRInt32 compat) {
482 : nsresult r;
483 : PRInt32 dec_len;
484 :
485 : again:
486 : r = mdn__unicode_decompose(compat, wb->ucs + wb->last,
487 3464 : wb->size - wb->last, c, &dec_len);
488 3464 : switch (r) {
489 : case NS_OK:
490 235 : wb->last += dec_len;
491 235 : return (NS_OK);
492 : case NS_SUCCESS_UNORM_NOTFOUND:
493 3229 : return (workbuf_append(wb, c));
494 : case NS_ERROR_UNORM_MOREOUTPUT:
495 0 : if ((r = workbuf_extend(wb)) != NS_OK)
496 0 : return (r);
497 0 : if (wb->size > WORKBUF_SIZE_MAX) {
498 : // "mdn__unormalize_form*: " "working buffer too large\n"
499 0 : return (NS_ERROR_FAILURE);
500 : }
501 0 : goto again;
502 : default:
503 0 : return (r);
504 : }
505 : /* NOTREACHED */
506 : }
507 :
508 : static void
509 3464 : get_class(workbuf_t *wb) {
510 : PRInt32 i;
511 :
512 7165 : for (i = wb->cur; i < wb->last; i++)
513 3701 : wb->cclass[i] = canonclass(wb->ucs[i]);
514 3464 : }
515 :
516 : static void
517 243 : reorder(workbuf_t *wb) {
518 : PRUint32 c;
519 : PRInt32 i;
520 : PRInt32 cclass;
521 :
522 : //assert(wb != NULL);
523 :
524 243 : i = wb->cur;
525 243 : c = wb->ucs[i];
526 243 : cclass = wb->cclass[i];
527 :
528 486 : while (i > 0 && wb->cclass[i - 1] > cclass) {
529 0 : wb->ucs[i] = wb->ucs[i - 1];
530 0 : wb->cclass[i] =wb->cclass[i - 1];
531 0 : i--;
532 0 : wb->ucs[i] = c;
533 0 : wb->cclass[i] = cclass;
534 : }
535 243 : }
536 :
537 : static void
538 3458 : compose(workbuf_t *wb) {
539 : PRInt32 cur;
540 : PRUint32 *ucs;
541 : PRInt32 *cclass;
542 : PRInt32 last_class;
543 : PRInt32 nvoids;
544 : PRInt32 i;
545 :
546 : //assert(wb != NULL && wb->cclass[0] == 0);
547 :
548 3458 : cur = wb->cur;
549 3458 : ucs = wb->ucs;
550 3458 : cclass = wb->cclass;
551 :
552 : /*
553 : * If there are no decomposition sequence that begins with
554 : * the top character, composition is impossible.
555 : */
556 3458 : if (!mdn__unicode_iscompositecandidate(ucs[0]))
557 509 : return;
558 :
559 2949 : last_class = 0;
560 2949 : nvoids = 0;
561 5702 : for (i = 1; i <= cur; i++) {
562 : PRUint32 c;
563 2753 : PRInt32 cl = cclass[i];
564 :
565 5506 : if ((last_class < cl || cl == 0) &&
566 2753 : mdn__unicode_compose(ucs[0], ucs[i],
567 2753 : &c) == NS_OK) {
568 : /*
569 : * Replace the top character with the composed one.
570 : */
571 239 : ucs[0] = c;
572 239 : cclass[0] = canonclass(c);
573 :
574 239 : cclass[i] = -1; /* void this character */
575 239 : nvoids++;
576 : } else {
577 2514 : last_class = cl;
578 : }
579 : }
580 :
581 : /* Purge void characters, if any. */
582 2949 : if (nvoids > 0)
583 239 : workbuf_removevoid(wb);
584 : }
585 :
586 : static nsresult
587 3450 : flush_before_cur(workbuf_t *wb, nsAString& aToStr)
588 : {
589 : PRInt32 i;
590 :
591 6912 : for (i = 0; i < wb->cur; i++) {
592 3462 : if (!IS_IN_BMP(wb->ucs[i])) {
593 0 : aToStr.Append((PRUnichar)H_SURROGATE(wb->ucs[i]));
594 0 : aToStr.Append((PRUnichar)L_SURROGATE(wb->ucs[i]));
595 : } else {
596 3462 : aToStr.Append((PRUnichar)(wb->ucs[i]));
597 : }
598 : }
599 :
600 3450 : workbuf_shift(wb, wb->cur);
601 :
602 3450 : return (NS_OK);
603 : }
604 :
605 : static void
606 567 : workbuf_init(workbuf_t *wb) {
607 567 : wb->cur = 0;
608 567 : wb->last = 0;
609 567 : wb->size = WORKBUF_SIZE;
610 567 : wb->ucs = wb->ucs_buf;
611 567 : wb->cclass = wb->class_buf;
612 567 : }
613 :
614 : static void
615 567 : workbuf_free(workbuf_t *wb) {
616 567 : if (wb->ucs != wb->ucs_buf) {
617 0 : nsMemory::Free(wb->ucs);
618 0 : nsMemory::Free(wb->cclass);
619 : }
620 567 : }
621 :
622 : static nsresult
623 0 : workbuf_extend(workbuf_t *wb) {
624 0 : PRInt32 newsize = wb->size * 3;
625 :
626 0 : if (wb->ucs == wb->ucs_buf) {
627 0 : wb->ucs = (PRUint32*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize);
628 0 : if (!wb->ucs)
629 0 : return NS_ERROR_OUT_OF_MEMORY;
630 0 : wb->cclass = (PRInt32*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize);
631 0 : if (!wb->cclass) {
632 0 : nsMemory::Free(wb->ucs);
633 0 : wb->ucs = NULL;
634 0 : return NS_ERROR_OUT_OF_MEMORY;
635 : }
636 : } else {
637 0 : void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);
638 0 : if (!buf)
639 0 : return NS_ERROR_OUT_OF_MEMORY;
640 0 : wb->ucs = (PRUint32*)buf;
641 0 : buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);
642 0 : if (!buf)
643 0 : return NS_ERROR_OUT_OF_MEMORY;
644 0 : wb->cclass = (PRInt32*)buf;
645 : }
646 0 : return (NS_OK);
647 : }
648 :
649 : static nsresult
650 3229 : workbuf_append(workbuf_t *wb, PRUint32 c) {
651 : nsresult r;
652 :
653 3229 : if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)
654 0 : return (r);
655 3229 : wb->ucs[wb->last++] = c;
656 3229 : return (NS_OK);
657 : }
658 :
659 : static void
660 3450 : workbuf_shift(workbuf_t *wb, PRInt32 shift) {
661 : PRInt32 nmove;
662 :
663 : //assert(wb != NULL && wb->cur >= shift);
664 :
665 3450 : nmove = wb->last - shift;
666 : memmove(&wb->ucs[0], &wb->ucs[shift],
667 3450 : nmove * sizeof(wb->ucs[0]));
668 : memmove(&wb->cclass[0], &wb->cclass[shift],
669 3450 : nmove * sizeof(wb->cclass[0]));
670 3450 : wb->cur -= shift;
671 3450 : wb->last -= shift;
672 3450 : }
673 :
674 : static void
675 239 : workbuf_removevoid(workbuf_t *wb) {
676 : PRInt32 i, j;
677 239 : PRInt32 last = wb->last;
678 :
679 949 : for (i = j = 0; i < last; i++) {
680 710 : if (wb->cclass[i] >= 0) {
681 471 : if (j < i) {
682 232 : wb->ucs[j] = wb->ucs[i];
683 232 : wb->cclass[j] = wb->cclass[i];
684 : }
685 471 : j++;
686 : }
687 : }
688 239 : wb->cur -= last - j;
689 239 : wb->last = j;
690 239 : }
691 :
692 : nsresult
693 0 : nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
694 : {
695 0 : return mdn_normalize(false, false, aSrc, aDest);
696 : }
697 :
698 : nsresult
699 0 : nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
700 : {
701 0 : return mdn_normalize(true, false, aSrc, aDest);
702 : }
703 :
704 : nsresult
705 0 : nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
706 : {
707 0 : return mdn_normalize(false, true, aSrc, aDest);
708 : }
709 :
710 : nsresult
711 567 : nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
712 : {
713 567 : return mdn_normalize(true, true, aSrc, aDest);
714 : }
715 :
716 : bool
717 0 : nsUnicodeNormalizer::Compose(PRUint32 a, PRUint32 b, PRUint32 *ab)
718 : {
719 0 : return mdn__unicode_compose(a, b, ab) == NS_OK;
720 : }
721 :
722 : bool
723 0 : nsUnicodeNormalizer::DecomposeNonRecursively(PRUint32 c, PRUint32 *c1, PRUint32 *c2)
724 : {
725 : // We can't use mdn__unicode_decompose here, because that does a recursive
726 : // decomposition that may yield more than two characters, but the harfbuzz
727 : // callback wants just a single-step decomp that is guaranteed to produce
728 : // no more than two characters. So we do a low-level lookup in the table
729 : // of decomp sequences.
730 : const PRUint32 *seq;
731 0 : PRUint32 seqidx = decompose_char(c, &seq);
732 0 : if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
733 0 : return false;
734 : }
735 0 : *c1 = *seq & ~END_BIT;
736 0 : if (*seq & END_BIT) {
737 0 : *c2 = 0;
738 : } else {
739 0 : *c2 = *++seq & ~END_BIT;
740 : }
741 0 : return true;
742 : }
|