LCOV - code coverage report
Current view: directory - intl/unicharutil/src - nsUnicodeNormalizer.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 238 176 73.9 %
Date: 2012-06-02 Functions: 30 24 80.0 %

       1                 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2                 : 
       3                 : /* This file is modified from JPNIC's mDNKit, it is under both MPL and 
       4                 :  * JPNIC's license.
       5                 :  */
       6                 : 
       7                 :  /* ***** BEGIN LICENSE BLOCK *****
       8                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       9                 :  *
      10                 :  * The contents of this file are subject to the Mozilla Public License Version
      11                 :  * 1.1 (the "License"); you may not use this file except in compliance with
      12                 :  * the License. You may obtain a copy of the License at
      13                 :  * http://www.mozilla.org/MPL/
      14                 :  *
      15                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      16                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      17                 :  * for the specific language governing rights and limitations under the
      18                 :  * License.
      19                 :  *
      20                 :  * The Original Code is Unicode case conversion helpers.
      21                 :  *
      22                 :  * The Initial Developer of the Original Code is
      23                 :  * Netscape Communications Corp..
      24                 :  * Portions created by the Initial Developer are Copyright (C) 2002
      25                 :  * the Initial Developer. All Rights Reserved.
      26                 :  *
      27                 :  * Contributor(s):
      28                 :  *
      29                 :  * Alternatively, the contents of this file may be used under the terms of
      30                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      31                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      32                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      33                 :  * of those above. If you wish to allow use of your version of this file only
      34                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      35                 :  * use your version of this file under the terms of the MPL, indicate your
      36                 :  * decision by deleting the provisions above and replace them with the notice
      37                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      38                 :  * the provisions above, a recipient may use your version of this file under
      39                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      40                 :  *
      41                 :  * ***** END LICENSE BLOCK ***** */
      42                 : 
      43                 : /*
      44                 :  * Copyright (c) 2000,2002 Japan Network Information Center.
      45                 :  * All rights reserved.
      46                 :  *  
      47                 :  * By using this file, you agree to the terms and conditions set forth bellow.
      48                 :  * 
      49                 :  *                      LICENSE TERMS AND CONDITIONS 
      50                 :  * 
      51                 :  * The following License Terms and Conditions apply, unless a different
      52                 :  * license is obtained from Japan Network Information Center ("JPNIC"),
      53                 :  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
      54                 :  * Chiyoda-ku, Tokyo 101-0047, Japan.
      55                 :  * 
      56                 :  * 1. Use, Modification and Redistribution (including distribution of any
      57                 :  *    modified or derived work) in source and/or binary forms is permitted
      58                 :  *    under this License Terms and Conditions.
      59                 :  * 
      60                 :  * 2. Redistribution of source code must retain the copyright notices as they
      61                 :  *    appear in each source code file, this License Terms and Conditions.
      62                 :  * 
      63                 :  * 3. Redistribution in binary form must reproduce the Copyright Notice,
      64                 :  *    this License Terms and Conditions, in the documentation and/or other
      65                 :  *    materials provided with the distribution.  For the purposes of binary
      66                 :  *    distribution the "Copyright Notice" refers to the following language:
      67                 :  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
      68                 :  * 
      69                 :  * 4. The name of JPNIC may not be used to endorse or promote products
      70                 :  *    derived from this Software without specific prior written approval of
      71                 :  *    JPNIC.
      72                 :  * 
      73                 :  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
      74                 :  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      75                 :  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
      76                 :  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
      77                 :  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      78                 :  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
      79                 :  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
      80                 :  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      81                 :  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      82                 :  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      83                 :  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
      84                 :  */
      85                 : 
      86                 : #include <stdlib.h>
      87                 : #include <string.h>
      88                 : 
      89                 : #include "nsUnicharUtils.h"
      90                 : #include "nsMemory.h"
      91                 : #include "nsCRT.h"
      92                 : #include "nsUnicodeNormalizer.h"
      93                 : #include "nsString.h"
      94                 : #include "nsReadableUtils.h"
      95                 : 
      96           12768 : NS_IMPL_ISUPPORTS1(nsUnicodeNormalizer, nsIUnicodeNormalizer)
      97                 : 
      98                 : 
      99            1419 : nsUnicodeNormalizer::nsUnicodeNormalizer()
     100                 : {
     101            1419 : }
     102                 : 
     103            2832 : nsUnicodeNormalizer::~nsUnicodeNormalizer()
     104                 : {
     105            5664 : }
     106                 : 
     107                 : 
     108                 : 
     109                 : #define NS_ERROR_UNORM_MOREOUTPUT  \
     110                 :         NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_GENERAL, 0x21)
     111                 : 
     112                 : #define NS_SUCCESS_UNORM_NOTFOUND  \
     113                 :         NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_GENERAL, 0x11)
     114                 : 
     115                 : 
     116                 : #define END_BIT         0x80000000
     117                 : 
     118                 : 
     119                 : /*
     120                 :  * Some constants for Hangul decomposition/composition.
     121                 :  * These things were taken from unicode book. 
     122                 :  */
     123                 : #define SBase           0xac00
     124                 : #define LBase           0x1100
     125                 : #define VBase           0x1161
     126                 : #define TBase           0x11a7
     127                 : #define LCount          19
     128                 : #define VCount          21
     129                 : #define TCount          28
     130                 : #define SLast           (SBase + LCount * VCount * TCount)
     131                 : 
     132                 : struct composition {
     133                 :         PRUint32 c2;    /* 2nd character */
     134                 :         PRUint32 comp;  /* composed character */
     135                 : };
     136                 : 
     137                 : 
     138                 : #include "normalization_data.h"
     139                 : 
     140                 : /*
     141                 :  * Macro for multi-level index table.
     142                 :  */
     143                 : #define LOOKUPTBL(vprefix, mprefix, v) \
     144                 :         DMAP(vprefix)[\
     145                 :                 IMAP(vprefix)[\
     146                 :                         IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\
     147                 :                 ]\
     148                 :         ].tbl[IDX2(mprefix, v)]
     149                 : 
     150                 : #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))
     151                 : #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))
     152                 : #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))
     153                 : 
     154                 : #define IDX_0(v, bits1, bits2)  ((v) >> ((bits1) + (bits2)))
     155                 : #define IDX_1(v, bits1, bits2)  (((v) >> (bits2)) & ((1 << (bits1)) - 1))
     156                 : #define IDX_2(v, bits1, bits2)  ((v) & ((1 << (bits2)) - 1))
     157                 : 
     158                 : #define BITS1(mprefix)  mprefix ## _BITS_1
     159                 : #define BITS2(mprefix)  mprefix ## _BITS_2
     160                 : 
     161                 : #define IMAP(vprefix)   vprefix ## _imap
     162                 : #define DMAP(vprefix)   vprefix ## _table
     163                 : #define SEQ(vprefix)    vprefix ## _seq
     164                 : 
     165                 : static PRInt32
     166            3940 : canonclass(PRUint32 c) {
     167                 :         /* Look up canonicalclass table. */
     168            3940 :         return (LOOKUPTBL(canon_class, CANON_CLASS, c));
     169                 : }
     170                 : 
     171                 : static PRInt32
     172            3920 : decompose_char(PRUint32 c, const PRUint32 **seqp)
     173                 : {
     174                 :         /* Look up decomposition table. */
     175            3920 :         PRInt32 seqidx = LOOKUPTBL(decompose, DECOMP, c);
     176            3920 :         *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);
     177            3920 :         return (seqidx);
     178                 : }
     179                 : 
     180                 : static PRInt32
     181            6191 : compose_char(PRUint32 c,
     182                 :                                 const struct composition **compp)
     183                 : {
     184                 :         /* Look up composition table. */
     185            6191 :         PRInt32 seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);
     186            6191 :         *compp = SEQ(compose) + (seqidx & 0xffff);
     187            6191 :         return (seqidx >> 16);
     188                 : }
     189                 : 
     190                 : static nsresult
     191            3924 : mdn__unicode_decompose(PRInt32 compat, PRUint32 *v, size_t vlen,
     192                 :                        PRUint32 c, PRInt32 *decomp_lenp)
     193                 : {
     194            3924 :         PRUint32 *vorg = v;
     195                 :         PRInt32 seqidx;
     196                 :         const PRUint32 *seq;
     197                 : 
     198                 :         //assert(v != NULL && vlen >= 0 && decomp_lenp != NULL);
     199                 : 
     200                 :         /*
     201                 :          * First, check for Hangul.
     202                 :          */
     203            3924 :         if (SBase <= c && c < SLast) {
     204                 :                 PRInt32 idx, t_offset, v_offset, l_offset;
     205                 : 
     206               4 :                 idx = c - SBase;
     207               4 :                 t_offset = idx % TCount;
     208               4 :                 idx /= TCount;
     209               4 :                 v_offset = idx % VCount;
     210               4 :                 l_offset = idx / VCount;
     211               4 :                 if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
     212               0 :                         return (NS_ERROR_UNORM_MOREOUTPUT);
     213               4 :                 *v++ = LBase + l_offset;
     214               4 :                 *v++ = VBase + v_offset;
     215               4 :                 if (t_offset > 0)
     216               4 :                         *v++ = TBase + t_offset;
     217               4 :                 *decomp_lenp = v - vorg;
     218               4 :                 return (NS_OK);
     219                 :         }
     220                 : 
     221                 :         /*
     222                 :          * Look up decomposition table.  If no decomposition is defined
     223                 :          * or if it is a compatibility decomosition when canonical
     224                 :          * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.
     225                 :          */
     226            3920 :         seqidx = decompose_char(c, &seq);
     227            3920 :         if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
     228            3689 :                 return (NS_SUCCESS_UNORM_NOTFOUND);
     229                 :         
     230                 :         /*
     231                 :          * Copy the decomposed sequence.  The end of the sequence are
     232                 :          * marked with END_BIT.
     233                 :          */
     234             460 :         do {
     235                 :                 PRUint32 c;
     236                 :                 PRInt32 dlen;
     237                 :                 nsresult r;
     238                 : 
     239             460 :                 c = *seq & ~END_BIT;
     240                 : 
     241                 :                 /* Decompose recursively. */
     242             460 :                 r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);
     243             460 :                 if (r == NS_OK) {
     244               0 :                         v += dlen;
     245               0 :                         vlen -= dlen;
     246             460 :                 } else if (r == NS_SUCCESS_UNORM_NOTFOUND) {
     247             460 :                         if (vlen < 1)
     248               0 :                                 return (NS_ERROR_UNORM_MOREOUTPUT);
     249             460 :                         *v++ = c;
     250             460 :                         vlen--;
     251                 :                 } else {
     252               0 :                         return (r);
     253                 :                 }
     254                 : 
     255                 :         } while ((*seq++ & END_BIT) == 0);
     256                 :         
     257             231 :         *decomp_lenp = v - vorg;
     258                 : 
     259             231 :         return (NS_OK);
     260                 : }
     261                 : 
     262                 : static PRInt32
     263            3458 : mdn__unicode_iscompositecandidate(PRUint32 c)
     264                 : {
     265                 :         const struct composition *dummy;
     266                 : 
     267                 :         /* Check for Hangul */
     268            3458 :         if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
     269              12 :                 return (1);
     270                 : 
     271                 :         /*
     272                 :          * Look up composition table.  If there are no composition
     273                 :          * that begins with the given character, it is not a
     274                 :          * composition candidate.
     275                 :          */
     276            3446 :         if (compose_char(c, &dummy) == 0)
     277             509 :                 return (0);
     278                 :         else
     279            2937 :                 return (1);
     280                 : }
     281                 : 
     282                 : static nsresult
     283            2753 : mdn__unicode_compose(PRUint32 c1, PRUint32 c2, PRUint32 *compp)
     284                 : {
     285                 :         PRInt32 n;
     286                 :         PRInt32 lo, hi;
     287                 :         const struct composition *cseq;
     288                 : 
     289                 :         //assert(compp != NULL);
     290                 : 
     291                 :         /*
     292                 :          * Check for Hangul.
     293                 :          */
     294            2753 :         if (LBase <= c1 && c1 < LBase + LCount &&
     295                 :             VBase <= c2 && c2 < VBase + VCount) {
     296                 :                 /*
     297                 :                  * Hangul L and V.
     298                 :                  */
     299                 :                 *compp = SBase +
     300               4 :                         ((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
     301               4 :                 return (NS_OK);
     302            2749 :         } else if (SBase <= c1 && c1 < SLast &&
     303                 :                    TBase <= c2 && c2 < TBase + TCount &&
     304                 :                    (c1 - SBase) % TCount == 0) {
     305                 :                 /*
     306                 :                  * Hangul LV and T.
     307                 :                  */
     308               4 :                 *compp = c1 + (c2 - TBase);
     309               4 :                 return (NS_OK);
     310                 :         }
     311                 : 
     312                 :         /*
     313                 :          * Look up composition table.  If the result is 0, no composition
     314                 :          * is defined.  Otherwise, upper 16bits of the result contains
     315                 :          * the number of composition that begins with 'c1', and the lower
     316                 :          * 16bits is the offset in 'compose_seq'.
     317                 :          */
     318            2745 :         if ((n = compose_char(c1, &cseq)) == 0)
     319              97 :                 return (NS_SUCCESS_UNORM_NOTFOUND);
     320                 : 
     321                 :         /*
     322                 :          * The composite sequences are sorted by the 2nd character 'c2'.
     323                 :          * So we can use binary search.
     324                 :          */
     325            2648 :         lo = 0;
     326            2648 :         hi = n - 1;
     327           11933 :         while (lo <= hi) {
     328            6868 :                 PRInt32 mid = (lo + hi) / 2;
     329                 : 
     330            6868 :                 if (cseq[mid].c2 < c2) {
     331             240 :                         lo = mid + 1;
     332            6628 :                 } else if (cseq[mid].c2 > c2) {
     333            6397 :                         hi = mid - 1;
     334                 :                 } else {
     335             231 :                         *compp = cseq[mid].comp;
     336             231 :                         return (NS_OK);
     337                 :                 }
     338                 :         }
     339            2417 :         return (NS_SUCCESS_UNORM_NOTFOUND);
     340                 : }
     341                 : 
     342                 : 
     343                 : #define WORKBUF_SIZE            128
     344                 : #define WORKBUF_SIZE_MAX        10000
     345                 : 
     346                 : typedef struct {
     347                 :         PRInt32 cur;            /* pointing now processing character */
     348                 :         PRInt32 last;           /* pointing just after the last character */
     349                 :         PRInt32 size;           /* size of UCS and CLASS array */
     350                 :         PRUint32 *ucs;  /* UCS-4 characters */
     351                 :         PRInt32 *cclass;                /* and their canonical classes */
     352                 :         PRUint32 ucs_buf[WORKBUF_SIZE]; /* local buffer */
     353                 :         PRInt32 class_buf[WORKBUF_SIZE];                /* ditto */
     354                 : } workbuf_t;
     355                 : 
     356                 : static nsresult decompose(workbuf_t *wb, PRUint32 c, PRInt32 compat);
     357                 : static void             get_class(workbuf_t *wb);
     358                 : static void             reorder(workbuf_t *wb);
     359                 : static void             compose(workbuf_t *wb);
     360                 : static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);
     361                 : static void             workbuf_init(workbuf_t *wb);
     362                 : static void             workbuf_free(workbuf_t *wb);
     363                 : static nsresult workbuf_extend(workbuf_t *wb);
     364                 : static nsresult workbuf_append(workbuf_t *wb, PRUint32 c);
     365                 : static void             workbuf_shift(workbuf_t *wb, PRInt32 shift);
     366                 : static void             workbuf_removevoid(workbuf_t *wb);
     367                 : 
     368                 : 
     369                 : static nsresult
     370             567 : mdn_normalize(bool do_composition, bool compat,
     371                 :           const nsAString& aSrcStr, nsAString& aToStr)
     372                 : {
     373                 :         workbuf_t wb;
     374             567 :         nsresult r = NS_OK;
     375                 :         /*
     376                 :          * Initialize working buffer.
     377                 :          */
     378             567 :         workbuf_init(&wb);
     379                 : 
     380             567 :         nsAString::const_iterator start, end;
     381             567 :         aSrcStr.BeginReading(start); 
     382             567 :         aSrcStr.EndReading(end); 
     383                 : 
     384            4598 :         while (start != end) {
     385                 :                 PRUint32 c;
     386                 :                 PRUnichar curChar;
     387                 : 
     388                 :                 //assert(wb.cur == wb.last);
     389                 : 
     390                 :                 /*
     391                 :                  * Get one character from 'from'.
     392                 :                  */
     393            3464 :                 curChar= *start++;
     394                 : 
     395            3464 :                 if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {
     396               0 :                         c = SURROGATE_TO_UCS4(curChar, *start);
     397               0 :                         ++start;
     398                 :                 } else {
     399            3464 :                         c = curChar;
     400                 :                 }
     401                 : 
     402                 :                 /*
     403                 :                  * Decompose it.
     404                 :                  */
     405            3464 :                 if ((r = decompose(&wb, c, compat)) != NS_OK)
     406               0 :                         break;
     407                 : 
     408                 :                 /*
     409                 :                  * Get canonical class.
     410                 :                  */
     411            3464 :                 get_class(&wb);
     412                 : 
     413                 :                 /*
     414                 :                  * Reorder & compose.
     415                 :                  */
     416            7165 :                 for (; wb.cur < wb.last; wb.cur++) {
     417            3701 :                         if (wb.cur == 0) {
     418             567 :                                 continue;
     419            3134 :                         } else if (wb.cclass[wb.cur] > 0) {
     420                 :                                 /*
     421                 :                                  * This is not a starter. Try reordering.
     422                 :                                  * Note that characters up to it are
     423                 :                                  * already in canonical order.
     424                 :                                  */
     425             243 :                                 reorder(&wb);
     426             243 :                                 continue;
     427                 :                         }
     428                 : 
     429                 :                         /*
     430                 :                          * This is a starter character, and there are
     431                 :                          * some characters before it.  Those characters
     432                 :                          * have been reordered properly, and
     433                 :                          * ready for composition.
     434                 :                          */
     435            2891 :                         if (do_composition && wb.cclass[0] == 0)
     436            2891 :                                 compose(&wb);
     437                 : 
     438                 :                         /*
     439                 :                          * If CUR points to a starter character,
     440                 :                          * then process of characters before CUR are
     441                 :                          * already finished, because any further
     442                 :                          * reordering/composition for them are blocked
     443                 :                          * by the starter CUR points.
     444                 :                          */
     445            2891 :                         if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
     446                 :                                 /* Flush everything before CUR. */
     447            2883 :                                 r = flush_before_cur(&wb, aToStr);
     448            2883 :                                 if (r != NS_OK)
     449               0 :                                         break;
     450                 :                         }
     451                 :                 }
     452                 :         }
     453                 : 
     454             567 :         if (r == NS_OK) {
     455             567 :                 if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
     456                 :                         /*
     457                 :                          * There is some characters left in WB.
     458                 :                          * They are ordered, but not composed yet.
     459                 :                          * Now CUR points just after the last character in WB,
     460                 :                          * and since compose() tries to compose characters
     461                 :                          * between top and CUR inclusive, we must make CUR
     462                 :                          * one character back during compose().
     463                 :                          */
     464             567 :                         wb.cur--;
     465             567 :                         compose(&wb);
     466             567 :                         wb.cur++;
     467                 :                 }
     468                 :                 /*
     469                 :                  * Call this even when WB.CUR == 0, to make TO
     470                 :                  * NUL-terminated.
     471                 :                  */
     472             567 :                 r = flush_before_cur(&wb, aToStr);
     473                 :         }
     474                 : 
     475             567 :         workbuf_free(&wb);
     476                 : 
     477             567 :         return (r);
     478                 : }
     479                 : 
     480                 : static nsresult
     481            3464 : decompose(workbuf_t *wb, PRUint32 c, PRInt32 compat) {
     482                 :         nsresult r;
     483                 :         PRInt32 dec_len;
     484                 : 
     485                 : again:
     486                 :         r = mdn__unicode_decompose(compat, wb->ucs + wb->last,
     487            3464 :                                    wb->size - wb->last, c, &dec_len);
     488            3464 :         switch (r) {
     489                 :         case NS_OK:
     490             235 :                 wb->last += dec_len;
     491             235 :                 return (NS_OK);
     492                 :         case NS_SUCCESS_UNORM_NOTFOUND:
     493            3229 :                 return (workbuf_append(wb, c));
     494                 :         case NS_ERROR_UNORM_MOREOUTPUT:
     495               0 :                 if ((r = workbuf_extend(wb)) != NS_OK)
     496               0 :                         return (r);
     497               0 :                 if (wb->size > WORKBUF_SIZE_MAX) {
     498                 :                         // "mdn__unormalize_form*: " "working buffer too large\n"
     499               0 :                         return (NS_ERROR_FAILURE);
     500                 :                 }
     501               0 :                 goto again;
     502                 :         default:
     503               0 :                 return (r);
     504                 :         }
     505                 :         /* NOTREACHED */
     506                 : }
     507                 : 
     508                 : static void             
     509            3464 : get_class(workbuf_t *wb) {
     510                 :         PRInt32 i;
     511                 : 
     512            7165 :         for (i = wb->cur; i < wb->last; i++)
     513            3701 :                 wb->cclass[i] = canonclass(wb->ucs[i]);
     514            3464 : }
     515                 : 
     516                 : static void
     517             243 : reorder(workbuf_t *wb) {
     518                 :         PRUint32 c;
     519                 :         PRInt32 i;
     520                 :         PRInt32 cclass;
     521                 : 
     522                 :         //assert(wb != NULL);
     523                 : 
     524             243 :         i = wb->cur;
     525             243 :         c = wb->ucs[i];
     526             243 :         cclass = wb->cclass[i];
     527                 : 
     528             486 :         while (i > 0 && wb->cclass[i - 1] > cclass) {
     529               0 :                 wb->ucs[i] = wb->ucs[i - 1];
     530               0 :                 wb->cclass[i] =wb->cclass[i - 1];
     531               0 :                 i--;
     532               0 :                 wb->ucs[i] = c;
     533               0 :                 wb->cclass[i] = cclass;
     534                 :         }
     535             243 : }
     536                 : 
     537                 : static void
     538            3458 : compose(workbuf_t *wb) {
     539                 :         PRInt32 cur;
     540                 :         PRUint32 *ucs;
     541                 :         PRInt32 *cclass;
     542                 :         PRInt32 last_class;
     543                 :         PRInt32 nvoids;
     544                 :         PRInt32 i;
     545                 : 
     546                 :         //assert(wb != NULL && wb->cclass[0] == 0);
     547                 : 
     548            3458 :         cur = wb->cur;
     549            3458 :         ucs = wb->ucs;
     550            3458 :         cclass = wb->cclass;
     551                 : 
     552                 :         /*
     553                 :          * If there are no decomposition sequence that begins with
     554                 :          * the top character, composition is impossible.
     555                 :          */
     556            3458 :         if (!mdn__unicode_iscompositecandidate(ucs[0]))
     557             509 :                 return;
     558                 : 
     559            2949 :         last_class = 0;
     560            2949 :         nvoids = 0;
     561            5702 :         for (i = 1; i <= cur; i++) {
     562                 :                 PRUint32 c;
     563            2753 :                 PRInt32 cl = cclass[i];
     564                 : 
     565            5506 :                 if ((last_class < cl || cl == 0) &&
     566            2753 :                     mdn__unicode_compose(ucs[0], ucs[i],
     567            2753 :                                          &c) == NS_OK) {
     568                 :                         /*
     569                 :                          * Replace the top character with the composed one.
     570                 :                          */
     571             239 :                         ucs[0] = c;
     572             239 :                         cclass[0] = canonclass(c);
     573                 : 
     574             239 :                         cclass[i] = -1; /* void this character */
     575             239 :                         nvoids++;
     576                 :                 } else {
     577            2514 :                         last_class = cl;
     578                 :                 }
     579                 :         }
     580                 : 
     581                 :         /* Purge void characters, if any. */
     582            2949 :         if (nvoids > 0)
     583             239 :                 workbuf_removevoid(wb);
     584                 : }
     585                 : 
     586                 : static nsresult
     587            3450 : flush_before_cur(workbuf_t *wb, nsAString& aToStr) 
     588                 : {
     589                 :         PRInt32 i;
     590                 : 
     591            6912 :         for (i = 0; i < wb->cur; i++) {
     592            3462 :                 if (!IS_IN_BMP(wb->ucs[i])) {
     593               0 :                         aToStr.Append((PRUnichar)H_SURROGATE(wb->ucs[i]));
     594               0 :                         aToStr.Append((PRUnichar)L_SURROGATE(wb->ucs[i]));
     595                 :                 } else {
     596            3462 :                         aToStr.Append((PRUnichar)(wb->ucs[i]));
     597                 :                 }
     598                 :         }
     599                 : 
     600            3450 :         workbuf_shift(wb, wb->cur);
     601                 : 
     602            3450 :         return (NS_OK);
     603                 : }
     604                 : 
     605                 : static void
     606             567 : workbuf_init(workbuf_t *wb) {
     607             567 :         wb->cur = 0;
     608             567 :         wb->last = 0;
     609             567 :         wb->size = WORKBUF_SIZE;
     610             567 :         wb->ucs = wb->ucs_buf;
     611             567 :         wb->cclass = wb->class_buf;
     612             567 : }
     613                 : 
     614                 : static void
     615             567 : workbuf_free(workbuf_t *wb) {
     616             567 :         if (wb->ucs != wb->ucs_buf) {
     617               0 :                 nsMemory::Free(wb->ucs);
     618               0 :                 nsMemory::Free(wb->cclass);
     619                 :         }
     620             567 : }
     621                 : 
     622                 : static nsresult
     623               0 : workbuf_extend(workbuf_t *wb) {
     624               0 :         PRInt32 newsize = wb->size * 3;
     625                 : 
     626               0 :         if (wb->ucs == wb->ucs_buf) {
     627               0 :                 wb->ucs = (PRUint32*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize);
     628               0 :                 if (!wb->ucs)
     629               0 :                         return NS_ERROR_OUT_OF_MEMORY;
     630               0 :                 wb->cclass = (PRInt32*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize);
     631               0 :                 if (!wb->cclass) {
     632               0 :                         nsMemory::Free(wb->ucs);
     633               0 :                         wb->ucs = NULL;
     634               0 :                         return NS_ERROR_OUT_OF_MEMORY;
     635                 :                 }
     636                 :         } else {
     637               0 :                 void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);
     638               0 :                 if (!buf)
     639               0 :                         return NS_ERROR_OUT_OF_MEMORY;
     640               0 :                 wb->ucs = (PRUint32*)buf;
     641               0 :                 buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);
     642               0 :                 if (!buf)
     643               0 :                         return NS_ERROR_OUT_OF_MEMORY;
     644               0 :                 wb->cclass = (PRInt32*)buf;
     645                 :         }
     646               0 :         return (NS_OK);
     647                 : }
     648                 : 
     649                 : static nsresult
     650            3229 : workbuf_append(workbuf_t *wb, PRUint32 c) {
     651                 :         nsresult r;
     652                 : 
     653            3229 :         if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)
     654               0 :                 return (r);
     655            3229 :         wb->ucs[wb->last++] = c;
     656            3229 :         return (NS_OK);
     657                 : }
     658                 : 
     659                 : static void
     660            3450 : workbuf_shift(workbuf_t *wb, PRInt32 shift) {
     661                 :         PRInt32 nmove;
     662                 : 
     663                 :         //assert(wb != NULL && wb->cur >= shift);
     664                 : 
     665            3450 :         nmove = wb->last - shift;
     666                 :         memmove(&wb->ucs[0], &wb->ucs[shift],
     667            3450 :                       nmove * sizeof(wb->ucs[0]));
     668                 :         memmove(&wb->cclass[0], &wb->cclass[shift],
     669            3450 :                       nmove * sizeof(wb->cclass[0]));
     670            3450 :         wb->cur -= shift;
     671            3450 :         wb->last -= shift;
     672            3450 : }
     673                 : 
     674                 : static void
     675             239 : workbuf_removevoid(workbuf_t *wb) {
     676                 :         PRInt32 i, j;
     677             239 :         PRInt32 last = wb->last;
     678                 : 
     679             949 :         for (i = j = 0; i < last; i++) {
     680             710 :                 if (wb->cclass[i] >= 0) {
     681             471 :                         if (j < i) {
     682             232 :                                 wb->ucs[j] = wb->ucs[i];
     683             232 :                                 wb->cclass[j] = wb->cclass[i];
     684                 :                         }
     685             471 :                         j++;
     686                 :                 }
     687                 :         }
     688             239 :         wb->cur -= last - j;
     689             239 :         wb->last = j;
     690             239 : }
     691                 : 
     692                 : nsresult  
     693               0 : nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
     694                 : {
     695               0 :   return mdn_normalize(false, false, aSrc, aDest);
     696                 : }
     697                 : 
     698                 : nsresult  
     699               0 : nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
     700                 : {
     701               0 :   return mdn_normalize(true, false, aSrc, aDest);
     702                 : }
     703                 : 
     704                 : nsresult  
     705               0 : nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
     706                 : {
     707               0 :   return mdn_normalize(false, true, aSrc, aDest);
     708                 : }
     709                 : 
     710                 : nsresult  
     711             567 : nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
     712                 : {
     713             567 :   return mdn_normalize(true, true, aSrc, aDest);
     714                 : }
     715                 : 
     716                 : bool
     717               0 : nsUnicodeNormalizer::Compose(PRUint32 a, PRUint32 b, PRUint32 *ab)
     718                 : {
     719               0 :   return mdn__unicode_compose(a, b, ab) == NS_OK;
     720                 : }
     721                 : 
     722                 : bool
     723               0 : nsUnicodeNormalizer::DecomposeNonRecursively(PRUint32 c, PRUint32 *c1, PRUint32 *c2)
     724                 : {
     725                 :   // We can't use mdn__unicode_decompose here, because that does a recursive
     726                 :   // decomposition that may yield more than two characters, but the harfbuzz
     727                 :   // callback wants just a single-step decomp that is guaranteed to produce
     728                 :   // no more than two characters. So we do a low-level lookup in the table
     729                 :   // of decomp sequences.
     730                 :   const PRUint32 *seq;
     731               0 :   PRUint32 seqidx = decompose_char(c, &seq);
     732               0 :   if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
     733               0 :     return false;
     734                 :   }
     735               0 :   *c1 = *seq & ~END_BIT;
     736               0 :   if (*seq & END_BIT) {
     737               0 :     *c2 = 0;
     738                 :   } else {
     739               0 :     *c2 = *++seq & ~END_BIT;
     740                 :   }
     741               0 :   return true;
     742                 : }

Generated by: LCOV version 1.7