LCOV - code coverage report
Current view: directory - intl/uconv/ucvcn - nsHZToUnicode.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 63 50 79.4 %
Date: 2012-06-02 Functions: 2 2 100.0 %

       1                 : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is mozilla.org code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *
      24                 :  * Alternatively, the contents of this file may be used under the terms of
      25                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      26                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28                 :  * of those above. If you wish to allow use of your version of this file only
      29                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30                 :  * use your version of this file under the terms of the MPL, indicate your
      31                 :  * decision by deleting the provisions above and replace them with the notice
      32                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33                 :  * the provisions above, a recipient may use your version of this file under
      34                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      35                 :  *
      36                 :  * ***** END LICENSE BLOCK ***** */
      37                 : /**
      38                 :  * A character set converter from HZ to Unicode.
      39                 :  * 
      40                 :  *
      41                 :  * @created         08/Sept/1999
      42                 :  * @author  Yueheng Xu, Yueheng.Xu@intel.com
      43                 :  *
      44                 :  * Note: in this HZ-GB-2312 converter, we accept a string composed of 7-bit HZ 
      45                 :  *       encoded Chinese chars,as it is defined in RFC1843 available at 
      46                 :  *       http://www.cis.ohio-state.edu/htbin/rfc/rfc1843.html
      47                 :  *       and RFC1842 available at http://www.cis.ohio-state.edu/htbin/rfc/rfc1842.html.
      48                 :  *        
      49                 :  *       Earlier versions of the converter said:
      50                 :  *        "In an effort to match the similar extended capability of Microsoft 
      51                 :  *         Internet Explorer 5.0. We also accept the 8-bit GB encoded chars
      52                 :  *         mixed in a HZ string. 
      53                 :  *         But this should not be a recommendedd practice for HTML authors."
      54                 :  *       However, testing in current versions of IE shows that it only accepts
      55                 :  *       8-bit characters when the converter is in GB state, and when in ASCII
      56                 :  *       state each single 8-bit character is converted to U+FFFD
      57                 :  *
      58                 :  *       The priority of converting are as follows: first convert 8-bit GB code; then,
      59                 :  *       consume HZ ESC sequences such as '~{', '~}', '~~'; then, depending on the current
      60                 :  *       state ( default to ASCII state ) of the string, each 7-bit char is converted as an 
      61                 :  *       ASCII, or two 7-bit chars are converted into a Chinese character.
      62                 :  */
      63                 : 
      64                 : 
      65                 : 
      66                 : #include "nsUCvCnDll.h"
      67                 : #include "nsHZToUnicode.h"
      68                 : #include "gbku.h"
      69                 : 
      70                 : //----------------------------------------------------------------------
      71                 : // Class nsHZToUnicode [implementation]
      72                 : 
      73                 : //----------------------------------------------------------------------
      74                 : // Subclassing of nsTablesDecoderSupport class [implementation]
      75                 : 
      76                 : #define HZ_STATE_GB     1
      77                 : #define HZ_STATE_ASCII  2
      78                 : #define HZ_STATE_ODD_BYTE_FLAG 0x80
      79                 : #define HZLEAD1 '~'
      80                 : #define HZLEAD2 '{'
      81                 : #define HZLEAD3 '}'
      82                 : #define HZ_ODD_BYTE_STATE (mHZState & (HZ_STATE_ODD_BYTE_FLAG))
      83                 : #define HZ_ENCODING_STATE (mHZState & ~(HZ_STATE_ODD_BYTE_FLAG))
      84                 : 
      85               8 : nsHZToUnicode::nsHZToUnicode() : nsBufferDecoderSupport(1)
      86                 : {
      87               8 :   mHZState = HZ_STATE_ASCII;    // per HZ spec, default to ASCII state 
      88               8 :   mRunLength = 0;
      89               8 :   mOddByte = 0;
      90               8 : }
      91                 : 
      92                 : //Overwriting the ConvertNoBuff() in nsUCvCnSupport.cpp.
      93           65160 : NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
      94                 :   const char* aSrc, 
      95                 :   PRInt32 * aSrcLength, 
      96                 :   PRUnichar *aDest, 
      97                 :   PRInt32 * aDestLength)
      98                 : {
      99           65160 :   PRInt32 i=0;
     100           65160 :   PRInt32 iSrcLength = *aSrcLength;
     101           65160 :   PRInt32 iDestlen = 0;
     102           65160 :   *aSrcLength=0;
     103           65160 :   nsresult res = NS_OK;
     104           65160 :   char oddByte = mOddByte;
     105                 : 
     106          260620 :   for (i=0; i<iSrcLength; i++) {
     107          195460 :     if (iDestlen >= (*aDestLength)) {
     108               0 :       res = NS_OK_UDEC_MOREOUTPUT;
     109               0 :       break;
     110                 :     }
     111                 : 
     112          195460 :     char srcByte = *aSrc++;
     113          195460 :     (*aSrcLength)++;
     114                 :     
     115          195460 :     if (!HZ_ODD_BYTE_STATE) {
     116          194830 :       if (srcByte == HZLEAD1 || 
     117                 :           (HZ_ENCODING_STATE == HZ_STATE_GB && 
     118                 :            (UINT8_IN_RANGE(0x21, srcByte, 0x7E) ||
     119                 :             UINT8_IN_RANGE(0x81, srcByte, 0xFE)))) {
     120             630 :         oddByte = srcByte;
     121             630 :         mHZState |= HZ_STATE_ODD_BYTE_FLAG;
     122                 :       } else {
     123                 :         *aDest++ = (srcByte & 0x80) ? UCS2_NO_MAPPING :
     124          194200 :                                       CAST_CHAR_TO_UNICHAR(srcByte);
     125          194200 :         iDestlen++;
     126                 :       }
     127                 :     } else {
     128             630 :       if (oddByte & 0x80) {
     129                 :         // Accept legal 8-bit GB 2312-80 sequences in GB mode only
     130               0 :         NS_ASSERTION(HZ_ENCODING_STATE == HZ_STATE_GB,
     131                 :                      "Invalid lead byte in ASCII mode");                    
     132                 :         *aDest++ = (UINT8_IN_RANGE(0x81, oddByte, 0xFE) &&
     133                 :                     UINT8_IN_RANGE(0x40, srcByte, 0xFE)) ?
     134               0 :                      mUtil.GBKCharToUnicode(oddByte, srcByte) : UCS2_NO_MAPPING;
     135               0 :         mRunLength++;
     136               0 :         iDestlen++;
     137                 :       // otherwise, it is a 7-bit byte 
     138                 :       // The source will be an ASCII or a 7-bit HZ code depending on oddByte
     139             630 :       } else if (oddByte == HZLEAD1) { // if it is lead by '~'
     140             538 :         switch (srcByte) {
     141                 :           case HZLEAD2: 
     142                 :             // we got a '~{'
     143                 :             // we are switching to HZ state
     144              16 :             mHZState = HZ_STATE_GB;
     145              16 :             mRunLength = 0;
     146              16 :             break;
     147                 : 
     148                 :           case HZLEAD3: 
     149                 :             // we got a '~}'
     150                 :             // we are switching to ASCII state
     151              17 :             mHZState = HZ_STATE_ASCII;
     152              17 :             if (mRunLength == 0) {
     153               1 :               *aDest++ = UCS2_NO_MAPPING;
     154               1 :               iDestlen++;
     155                 :             }
     156              17 :             mRunLength = 0;
     157              17 :             break;
     158                 : 
     159                 :           case HZLEAD1: 
     160                 :             // we got a '~~', process like an ASCII, but no state change
     161               0 :             *aDest++ = CAST_CHAR_TO_UNICHAR(srcByte);
     162               0 :             iDestlen++;
     163               0 :             mRunLength++;
     164               0 :             break;
     165                 : 
     166                 :           default:
     167                 :             // Undefined ESC sequence '~X': treat as an error if X is a
     168                 :             // printable character or we are in ASCII mode, and resynchronize
     169                 :             // on the second character.
     170                 :             // 
     171                 :             // N.B. For compatibility with other implementations, we treat '~\n'
     172                 :             // as an illegal sequence even though RFC1843 permits it, and for
     173                 :             // the same reason we pass through control characters including '\n'
     174                 :             // and ' ' even in GB mode.
     175             505 :             if (srcByte > 0x20 || HZ_ENCODING_STATE == HZ_STATE_ASCII) {
     176             505 :               *aDest++ = UCS2_NO_MAPPING;
     177                 :             }
     178             505 :             aSrc--;
     179             505 :             (*aSrcLength)--;
     180             505 :             iDestlen++;
     181             505 :             break;
     182                 :         }
     183              92 :       } else if (HZ_ENCODING_STATE == HZ_STATE_GB) {
     184                 :         *aDest++ = (UINT8_IN_RANGE(0x21, oddByte, 0x7E) &&
     185                 :                     UINT8_IN_RANGE(0x21, srcByte, 0x7E)) ?
     186              92 :                      mUtil.GBKCharToUnicode(oddByte|0x80, srcByte|0x80) :
     187             184 :                      UCS2_NO_MAPPING;
     188              92 :         mRunLength++;
     189              92 :         iDestlen++;
     190                 :       } else {
     191               0 :         NS_NOTREACHED("2-byte sequence that we don't know how to handle");
     192               0 :         *aDest++ = UCS2_NO_MAPPING;
     193               0 :         iDestlen++;
     194                 :       }
     195             630 :       oddByte = 0;
     196             630 :       mHZState &= ~HZ_STATE_ODD_BYTE_FLAG;
     197                 :     }
     198                 :   } // for loop
     199           65160 :   mOddByte = HZ_ODD_BYTE_STATE ? oddByte : 0;
     200           65160 :   *aDestLength = iDestlen;
     201           65160 :   return res;
     202                 : }
     203                 : 
     204                 : 

Generated by: LCOV version 1.7