1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is Mozilla Communicator client code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsGBKConvUtil.h"
39 : #include "gbku.h"
40 : #include "nsCRT.h"
41 : #define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
42 : //--------------------------------------------------------------------
43 : // nsGBKConvUtil
44 : //--------------------------------------------------------------------
45 :
46 : static bool gInitToGBKTable = false;
47 : static const PRUnichar gGBKToUnicodeTable[MAX_GBK_LENGTH] = {
48 : #include "cp936map.h"
49 : };
50 : static PRUint16 gUnicodeToGBKTable[0xA000-0x4e00];
51 :
52 128 : bool nsGBKConvUtil::UnicodeToGBKChar(
53 : PRUnichar aChar, bool aToGL, char*
54 : aOutByte1, char* aOutByte2)
55 : {
56 128 : NS_ASSERTION(gInitToGBKTable, "gGBKToUnicodeTable is not init yet. need to call InitToGBKTable first");
57 128 : bool found=false;
58 128 : *aOutByte1 = *aOutByte2 = 0;
59 128 : if(UNICHAR_IN_RANGE(0xd800, aChar, 0xdfff))
60 : {
61 : // surrogate is not in here
62 0 : return false;
63 : }
64 128 : if(UNICHAR_IN_RANGE(0x4e00, aChar, 0x9FFF))
65 : {
66 128 : PRUint16 item = gUnicodeToGBKTable[aChar - 0x4e00];
67 128 : if(item != 0)
68 : {
69 128 : *aOutByte1 = item >> 8;
70 128 : *aOutByte2 = item & 0x00FF;
71 128 : found = true;
72 : } else {
73 0 : return false;
74 128 : }
75 : } else {
76 : // ugly linear search
77 0 : for( PRInt32 i = 0; i < MAX_GBK_LENGTH; i++ )
78 : {
79 0 : if( aChar == gGBKToUnicodeTable[i])
80 : {
81 0 : *aOutByte1 = (i / 0x00BF + 0x0081) ;
82 0 : *aOutByte2 = (i % 0x00BF + 0x0040) ;
83 0 : found = true;
84 0 : break;
85 : }
86 : }
87 : }
88 128 : if(! found)
89 0 : return false;
90 :
91 128 : if(aToGL) {
92 : // to GL, we only return if it is in the range
93 2 : if(UINT8_IN_RANGE(0xA1, *aOutByte1, 0xFE) &&
94 : UINT8_IN_RANGE(0xA1, *aOutByte2, 0xFE))
95 : {
96 : // mask them to GL
97 2 : *aOutByte1 &= 0x7F;
98 2 : *aOutByte2 &= 0x7F;
99 : } else {
100 : // if it does not fit into 0xa1-0xfe 0xa1-0xfe range that mean
101 : // it is not a GB2312 character, we cannot map to GL
102 0 : *aOutByte1 = 0x00;
103 0 : *aOutByte2 = 0x00;
104 0 : return false;
105 : }
106 : }
107 128 : return true;
108 : }
109 40664 : PRUnichar nsGBKConvUtil::GBKCharToUnicode(char aByte1, char aByte2)
110 : {
111 40664 : NS_ASSERTION(UINT8_IN_RANGE(0x81,aByte1, 0xFE), "first byte out of range");
112 40664 : NS_ASSERTION(UINT8_IN_RANGE(0x40,aByte2, 0xFE), "second byte out of range");
113 :
114 40664 : PRUint8 i1 = (PRUint8)aByte1;
115 40664 : PRUint8 i2 = (PRUint8)aByte2;
116 40664 : PRUint16 idx = (i1 - 0x0081) * 0x00bf + i2 - 0x0040 ;
117 :
118 40664 : NS_ASSERTION(idx < MAX_GBK_LENGTH, "ARB");
119 : // play it safe- add if statement here ot protect ARB
120 : // probably not necessary
121 40664 : if(idx < MAX_GBK_LENGTH)
122 40664 : return gGBKToUnicodeTable[ idx ];
123 : else
124 0 : return UCS2_NO_MAPPING;
125 : }
126 18 : void nsGBKConvUtil::InitToGBKTable()
127 : {
128 18 : if ( gInitToGBKTable )
129 10 : return;
130 :
131 : PRUnichar unicode;
132 : PRUnichar i;
133 : // zap it to zero first
134 8 : memset(gUnicodeToGBKTable,0, sizeof(gUnicodeToGBKTable));
135 :
136 192536 : for ( i=0; i<MAX_GBK_LENGTH; i++ )
137 : {
138 192528 : unicode = gGBKToUnicodeTable[i];
139 : // to reduce size of gUnicodeToGBKTable, we only do direct unicode to GB
140 : // table mapping between unicode 0x4E00 and 0xA000. Others by searching
141 : // gGBKToUnicodeTable. There is a trade off between memory usage and speed.
142 192528 : if(UNICHAR_IN_RANGE(0x4e00, unicode, 0x9fff))
143 : {
144 167216 : unicode -= 0x4E00;
145 167216 : gUnicodeToGBKTable[unicode] = (( i / 0x00BF + 0x0081) << 8) |
146 167216 : ( i % 0x00BF+ 0x0040);
147 : }
148 : }
149 8 : gInitToGBKTable = true;
150 : }
|