LCOV - code coverage report
Current view: directory - intl/uconv/ucvcn - nsUnicodeToGBK.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 135 30 22.2 %
Date: 2012-06-02 Functions: 19 2 10.5 %

       1                 : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is mozilla.org code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *
      24                 :  * Alternatively, the contents of this file may be used under the terms of
      25                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      26                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28                 :  * of those above. If you wish to allow use of your version of this file only
      29                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30                 :  * use your version of this file under the terms of the MPL, indicate your
      31                 :  * decision by deleting the provisions above and replace them with the notice
      32                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33                 :  * the provisions above, a recipient may use your version of this file under
      34                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      35                 :  *
      36                 :  * ***** END LICENSE BLOCK ***** */
      37                 :  /**
      38                 :  * A character set converter from Unicode to GBK.
      39                 :  * 
      40                 :  *
      41                 :  * @created         08/Sept/1999
      42                 :  * @author  Yueheng Xu, Yueheng.Xu@intel.com
      43                 :  * Revision History
      44                 :  * 04/Oct/1999. Yueheng Xu: used table gUnicodeToGBKTable[0x5200] to make 
      45                 :  *              Unicode to GB mapping fast 
      46                 :  */
      47                 : 
      48                 : #include "nsUnicodeToGBK.h"
      49                 : #include "nsUCvCnDll.h"
      50                 : #include "gbku.h"
      51                 : #include "uconvutil.h"
      52                 : #include "nsUnicharUtils.h"
      53                 : 
      54                 : //-------------------------------------------------------------
      55                 : // Global table initialization function defined in gbku.h
      56                 : //-------------------------------------------------------------
      57                 : 
      58                 : //-----------------------------------------------------------------------
      59                 : //  Private class used by nsUnicodeToGB18030 and nsUnicodeToGB18030Font0
      60                 : //    nsUnicodeToGB18030Uniq2Bytes
      61                 : //-----------------------------------------------------------------------
      62                 : static const PRUint16 g_uf_gb18030_2bytes[] = {
      63                 : #include "gb18030uniq2b.uf"
      64                 : };
      65                 : class nsUnicodeToGB18030Uniq2Bytes : public nsTableEncoderSupport
      66               0 : {
      67                 : public: 
      68               0 :   nsUnicodeToGB18030Uniq2Bytes() 
      69                 :     : nsTableEncoderSupport(u2BytesCharset,
      70               0 :                             (uMappingTable*) &g_uf_gb18030_2bytes, 2) {}
      71                 : protected: 
      72                 : };
      73                 : //-----------------------------------------------------------------------
      74                 : //  Private class used by nsUnicodeToGB18030
      75                 : //    nsUnicodeTo4BytesGB18030
      76                 : //-----------------------------------------------------------------------
      77                 : static const PRUint16 g_uf_gb18030_4bytes[] = {
      78                 : #include "gb180304bytes.uf"
      79                 : };
      80                 : class nsUnicodeTo4BytesGB18030 : public nsTableEncoderSupport
      81               0 : {
      82                 : public: 
      83               0 :   nsUnicodeTo4BytesGB18030()
      84                 :     : nsTableEncoderSupport(u4BytesGB18030Charset, 
      85               0 :                              (uMappingTable*) &g_uf_gb18030_4bytes, 4) {}
      86                 : protected: 
      87                 : };
      88                 : //-----------------------------------------------------------------------
      89                 : //  Private class used by nsUnicodeToGBK
      90                 : //    nsUnicodeToGBKUniq2Bytes
      91                 : //-----------------------------------------------------------------------
      92                 : static const PRUint16 g_uf_gbk_2bytes[] = {
      93                 : #include "gbkuniq2b.uf"
      94                 : };
      95                 : class nsUnicodeToGBKUniq2Bytes : public nsTableEncoderSupport
      96               0 : {
      97                 : public: 
      98               0 :   nsUnicodeToGBKUniq2Bytes()
      99                 :     : nsTableEncoderSupport(u2BytesCharset, 
     100               0 :                              (uMappingTable*) &g_uf_gbk_2bytes, 2) {}
     101                 : protected: 
     102                 : };
     103                 : //-----------------------------------------------------------------------
     104                 : //  nsUnicodeToGB18030
     105                 : //-----------------------------------------------------------------------
     106               0 : void nsUnicodeToGB18030::CreateExtensionEncoder()
     107                 : {
     108               0 :   mExtensionEncoder = new nsUnicodeToGB18030Uniq2Bytes();
     109               0 : }
     110               0 : void nsUnicodeToGB18030::Create4BytesEncoder()
     111                 : {
     112               0 :   m4BytesEncoder = new nsUnicodeTo4BytesGB18030();
     113               0 : }
     114                 : 
     115               0 : bool nsUnicodeToGB18030::EncodeSurrogate(
     116                 :   PRUnichar aSurrogateHigh,
     117                 :   PRUnichar aSurrogateLow,
     118                 :   char* aOut)
     119                 : {
     120               0 :   if( NS_IS_HIGH_SURROGATE(aSurrogateHigh) && 
     121                 :       NS_IS_LOW_SURROGATE(aSurrogateLow) )
     122                 :   {
     123                 :     // notice that idx does not include the 0x10000 
     124                 :     PRUint32 idx = ((aSurrogateHigh - (PRUnichar)0xD800) << 10 ) |
     125               0 :                    (aSurrogateLow - (PRUnichar) 0xDC00);
     126                 : 
     127               0 :     unsigned char *out = (unsigned char*) aOut;
     128                 :     // notice this is from 0x90 for supplment planes
     129               0 :     out[0] = (idx / (10*126*10)) + 0x90; 
     130               0 :     idx %= (10*126*10);
     131               0 :     out[1] = (idx / (10*126)) + 0x30;
     132               0 :     idx %= (10*126);
     133               0 :     out[2] = (idx / (10)) + 0x81;
     134               0 :     out[3] = (idx % 10) + 0x30;
     135               0 :     return true;
     136                 :   } 
     137               0 :   return false; 
     138                 : } 
     139                 : 
     140                 : //----------------------------------------------------------------------
     141                 : // Class nsUnicodeToGBK [implementation]
     142                 : 
     143               9 : nsUnicodeToGBK::nsUnicodeToGBK(PRUint32 aMaxLength) :
     144               9 :   nsEncoderSupport(aMaxLength)
     145                 : {
     146               9 :   mExtensionEncoder = nsnull;
     147               9 :   m4BytesEncoder = nsnull;
     148               9 :   mUtil.InitToGBKTable();
     149               9 :   mSurrogateHigh = 0;
     150               9 : }
     151               0 : void nsUnicodeToGBK::CreateExtensionEncoder()
     152                 : {
     153               0 :   mExtensionEncoder = new nsUnicodeToGBKUniq2Bytes();
     154               0 : }
     155               0 : void nsUnicodeToGBK::Create4BytesEncoder()
     156                 : {
     157               0 :   m4BytesEncoder = nsnull;
     158               0 : }
     159               0 : bool nsUnicodeToGBK::TryExtensionEncoder(
     160                 :   PRUnichar aChar,
     161                 :   char* aOut,
     162                 :   PRInt32 *aOutLen
     163                 : )
     164                 : {
     165               0 :   if( NS_IS_HIGH_SURROGATE(aChar) || 
     166                 :       NS_IS_LOW_SURROGATE(aChar) )
     167                 :   {
     168                 :     // performance tune for surrogate characters
     169               0 :     return false;
     170                 :   }
     171               0 :   if(! mExtensionEncoder )
     172               0 :     CreateExtensionEncoder();
     173               0 :   if(mExtensionEncoder) 
     174                 :   {
     175               0 :     PRInt32 len = 1;
     176               0 :     nsresult res = NS_OK;
     177               0 :     res = mExtensionEncoder->Convert(&aChar, &len, aOut, aOutLen);
     178               0 :     if(NS_SUCCEEDED(res) && (*aOutLen > 0))
     179               0 :       return true;
     180                 :   }
     181               0 :   return false;
     182                 : }
     183                 : 
     184               0 : bool nsUnicodeToGBK::Try4BytesEncoder(
     185                 :   PRUnichar aChar,
     186                 :   char* aOut,
     187                 :   PRInt32 *aOutLen
     188                 : )
     189                 : {
     190               0 :   if( NS_IS_HIGH_SURROGATE(aChar) || 
     191                 :       NS_IS_LOW_SURROGATE(aChar) )
     192                 :   {
     193                 :     // performance tune for surrogate characters
     194               0 :     return false;
     195                 :   }
     196               0 :   if(! m4BytesEncoder )
     197               0 :     Create4BytesEncoder();
     198               0 :   if(m4BytesEncoder) 
     199                 :   {
     200               0 :     PRInt32 len = 1;
     201               0 :     nsresult res = NS_OK;
     202               0 :     res = m4BytesEncoder->Convert(&aChar, &len, aOut, aOutLen);
     203               0 :     NS_ASSERTION(NS_FAILED(res) || ((1 == len) && (4 == *aOutLen)),
     204                 :       "unexpect conversion length");
     205               0 :     if(NS_SUCCEEDED(res) && (*aOutLen > 0))
     206               0 :       return true;
     207                 :   }
     208               0 :   return false;
     209                 : }
     210               0 : bool nsUnicodeToGBK::EncodeSurrogate(
     211                 :   PRUnichar aSurrogateHigh,
     212                 :   PRUnichar aSurrogateLow,
     213                 :   char* aOut)
     214                 : {
     215               0 :   return false; // GBK cannot encode Surrogate, let the subclass encode it.
     216                 : } 
     217                 : 
     218               4 : NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(
     219                 :   const PRUnichar * aSrc, 
     220                 :   PRInt32 * aSrcLength, 
     221                 :   char * aDest, 
     222                 :   PRInt32 * aDestLength)
     223                 : {
     224               4 :   PRInt32 iSrcLength = 0;
     225               4 :   PRInt32 iDestLength = 0;
     226                 :   PRUnichar unicode;
     227               4 :   nsresult res = NS_OK;
     228             346 :   while (iSrcLength < *aSrcLength )
     229                 :   {
     230             338 :     unicode = *aSrc;
     231                 :     //if unicode's hi byte has something, it is not ASCII, must be a GB
     232             338 :     if(IS_ASCII(unicode))
     233                 :     {
     234                 :       // this is an ASCII
     235             212 :       *aDest = CAST_UNICHAR_TO_CHAR(*aSrc);
     236             212 :       aDest++; // increment 1 byte
     237             212 :       iDestLength +=1;
     238                 :     } else {
     239                 :       char byte1, byte2;
     240             126 :       if(mUtil.UnicodeToGBKChar( unicode, false, &byte1, &byte2))
     241                 :       {
     242                 :         // make sure we still have 2 bytes for output first
     243             126 :         if(iDestLength+2 > *aDestLength)
     244                 :         {
     245               0 :           res = NS_OK_UENC_MOREOUTPUT;
     246               0 :           break;
     247                 :         }
     248             126 :         aDest[0] = byte1;
     249             126 :         aDest[1] = byte2;
     250             126 :         aDest += 2;     // increment 2 bytes
     251             126 :         iDestLength +=2;
     252                 :       } else {
     253               0 :         PRInt32 aOutLen = 2;
     254                 :         // make sure we still have 2 bytes for output first
     255               0 :         if(iDestLength+2 > *aDestLength)
     256                 :         {
     257               0 :           res = NS_OK_UENC_MOREOUTPUT;
     258               0 :           break;
     259                 :         }
     260                 :         // we cannot map in the common mapping. Let's try to
     261                 :         // call the delegated 2 byte converter for the gbk or gb18030
     262                 :         // unique 2 byte mapping
     263               0 :         if(TryExtensionEncoder(unicode, aDest, &aOutLen))
     264                 :         {
     265               0 :           iDestLength += aOutLen;
     266               0 :           aDest += aOutLen;
     267                 :         } else {
     268                 :           // make sure we still have 4 bytes for output first
     269               0 :           if(iDestLength+4 > *aDestLength)
     270                 :           {
     271               0 :             res = NS_OK_UENC_MOREOUTPUT;
     272               0 :             break;
     273                 :           }
     274                 :           // we still cannot map. Let's try to
     275                 :           // call the delegated GB18030 4 byte converter 
     276               0 :           aOutLen = 4;
     277               0 :           if( NS_IS_HIGH_SURROGATE(unicode) )
     278                 :           {
     279               0 :             if((iSrcLength+1) < *aSrcLength ) {
     280               0 :               if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) {
     281                 :                 // since we got a surrogate pair, we need to increment src.
     282               0 :                 iSrcLength++ ; 
     283               0 :                 aSrc++;
     284               0 :                 iDestLength += aOutLen;
     285               0 :                 aDest += aOutLen;
     286                 :               } else {
     287                 :                 // only get a high surrogate, but not a low surrogate
     288               0 :                 res = NS_ERROR_UENC_NOMAPPING;
     289               0 :                 iSrcLength++;   // include length of the unmapped character
     290               0 :                 break;
     291                 :               }
     292                 :             } else {
     293               0 :               mSurrogateHigh = aSrc[0];
     294               0 :               break; // this will go to afterwhileloop
     295                 :             }
     296                 :           } else {
     297               0 :             if( NS_IS_LOW_SURROGATE(unicode) )
     298                 :             {
     299               0 :               if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) {
     300               0 :                 if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) {
     301               0 :                   iDestLength += aOutLen;
     302               0 :                   aDest += aOutLen;
     303                 :                 } else {
     304                 :                   // only get a high surrogate, but not a low surrogate
     305               0 :                   res = NS_ERROR_UENC_NOMAPPING;
     306               0 :                   iSrcLength++;   // include length of the unmapped character
     307               0 :                   break;
     308                 :                 }
     309                 :               } else {
     310                 :                 // only get a low surrogate, but not a low surrogate
     311               0 :                 res = NS_ERROR_UENC_NOMAPPING;
     312               0 :                 iSrcLength++;   // include length of the unmapped character
     313               0 :                 break;
     314                 :               }
     315                 :             } else {
     316               0 :               if(Try4BytesEncoder(unicode, aDest, &aOutLen))
     317                 :               {
     318               0 :                 NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here");
     319               0 :                 iDestLength += aOutLen;
     320               0 :                 aDest += aOutLen;
     321                 :               } else {
     322               0 :                 res = NS_ERROR_UENC_NOMAPPING;
     323               0 :                 iSrcLength++;   // include length of the unmapped character
     324               0 :                 break;
     325                 :               }
     326                 :             }
     327                 :           }
     328                 :         }
     329                 :       } 
     330                 :     }
     331             338 :     iSrcLength++ ; // Each unicode char just count as one in PRUnichar string;            
     332             338 :     mSurrogateHigh = 0;
     333             338 :     aSrc++;
     334             338 :     if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) )
     335                 :     {
     336               0 :       res = NS_OK_UENC_MOREOUTPUT;
     337               0 :       break;
     338                 :     }
     339                 :   }
     340                 : //afterwhileloop:
     341               4 :   *aDestLength = iDestLength;
     342               4 :   *aSrcLength = iSrcLength;
     343               4 :   return res;
     344                 : }

Generated by: LCOV version 1.7