1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is Mozilla Communicator client code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsUnicodeToISO2022JP.h"
39 : #include "nsIComponentManager.h"
40 : #include "nsUCVJADll.h"
41 : #include "nsUnicodeEncodeHelper.h"
42 :
43 : //----------------------------------------------------------------------
44 : // Global functions and data [declaration]
45 :
46 : // Basic mapping from Hankaku to Zenkaku
47 : // Nigori and Maru are taken care of outside this basic mapping
48 : static const PRUnichar gBasicMapping[0x40] =
49 : {
50 : // 0xff60
51 : 0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1,
52 : // 0xff68
53 : 0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3,
54 : // 0xff70
55 : 0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad,
56 : // 0xff78
57 : 0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd,
58 : // 0xff80
59 : 0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc,
60 : // 0xff88
61 : 0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de,
62 : // 0xff90
63 : 0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9,
64 : // 0xff98
65 : 0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c
66 : };
67 :
68 : // Do we need to check for Nigori for the next unicode ?
69 : #define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e)))
70 :
71 : // Do we need to check for Maru for the next unicode ?
72 : #define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e))
73 :
74 : // The unicode is in Katakana Hankaku block
75 : #define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f))
76 : #define IS_NIGORI(u) (0xff9e == (u))
77 : #define IS_MARU(u) (0xff9f == (u))
78 : #define NIGORI_MODIFIER 1
79 : #define MARU_MODIFIER 2
80 :
81 : static const PRUint16 g_ufAsciiMapping [] = {
82 : 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000
83 : };
84 :
85 : #define SIZE_OF_TABLES 5
86 : static const PRUint16 * g_ufMappingTables[SIZE_OF_TABLES] = {
87 : g_ufAsciiMapping, // ASCII ISOREG 6
88 : g_uf0201GLMapping, // JIS X 0201-1976 ISOREG 14
89 : g_uf0208Mapping, // JIS X 0208-1983 ISOREG 87
90 : g_uf0208extMapping, // JIS X 0208 - cp932 ext
91 : g_uf0208Mapping, // JIS X 0208-1978 ISOREG 42
92 : };
93 :
94 : static const uScanClassID g_ufScanClassIDs[SIZE_OF_TABLES] = {
95 : u1ByteCharset, // ASCII ISOREG 6
96 : u1ByteCharset, // JIS X 0201-1976 ISOREG 14
97 : u2BytesCharset, // JIS X 0208-1983 ISOREG 87
98 : u2BytesCharset, // JIS X 0208- cp932 ext
99 : u2BytesCharset, // JIS X 0208-1978 ISOREG 42
100 : };
101 : #define JIS_X_208_INDEX 2
102 :
103 : //----------------------------------------------------------------------
104 : // Class nsUnicodeToISO2022JP [implementation]
105 :
106 : // worst case max length:
107 : // 1 2 3 4 5 6 7 8
108 : // ESC $ B XX XX ESC ( B
109 9 : nsUnicodeToISO2022JP::nsUnicodeToISO2022JP()
110 9 : : nsEncoderSupport(8)
111 : {
112 9 : Reset();
113 9 : }
114 :
115 18 : nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP()
116 : {
117 36 : }
118 :
119 33 : nsresult nsUnicodeToISO2022JP::ChangeCharset(PRInt32 aCharset,
120 : char * aDest,
121 : PRInt32 * aDestLength)
122 : {
123 : // both 2 and 3 generate the same escape sequence. 2 is for
124 : // the standard JISx0208 table, and 3 is for theCP932 extensions
125 : // therefore, we treat them as the same one.
126 33 : if(((2 == aCharset) && ( 3 == mCharset)) ||
127 : ((3 == aCharset) && ( 2 == mCharset)) )
128 : {
129 0 : mCharset = aCharset;
130 : }
131 :
132 33 : if(aCharset == mCharset)
133 : {
134 17 : *aDestLength = 0;
135 17 : return NS_OK;
136 : }
137 :
138 16 : if (*aDestLength < 3) {
139 0 : *aDestLength = 0;
140 0 : return NS_OK_UENC_MOREOUTPUT;
141 : }
142 :
143 16 : switch (aCharset) {
144 : case 0: // ASCII ISOREG 6
145 8 : aDest[0] = 0x1b;
146 8 : aDest[1] = '(';
147 8 : aDest[2] = 'B';
148 8 : break;
149 : case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14
150 0 : aDest[0] = 0x1b;
151 0 : aDest[1] = '(';
152 0 : aDest[2] = 'J';
153 0 : break;
154 : case 2: // JIS X 0208-1983 ISOREG 87
155 : case 3: // JIS X 0208-1983
156 : // we currently use this for CP932 ext
157 8 : aDest[0] = 0x1b;
158 8 : aDest[1] = '$';
159 8 : aDest[2] = 'B';
160 8 : break;
161 : case 4: // JIS X 0201-1978 ISOREG 87-
162 : // we currently do not have a diff mapping for it.
163 0 : aDest[0] = 0x1b;
164 0 : aDest[1] = '$';
165 0 : aDest[2] = '@';
166 0 : break;
167 : }
168 :
169 16 : mCharset = aCharset;
170 16 : *aDestLength = 3;
171 16 : return NS_OK;
172 : }
173 :
174 6 : nsresult nsUnicodeToISO2022JP::ConvertHankaku(const PRUnichar * aSrc,
175 : PRInt32 * aSrcLength,
176 : char * aDest,
177 : PRInt32 * aDestLength)
178 : {
179 6 : nsresult res = NS_OK;
180 :
181 6 : const PRUnichar * src = aSrc;
182 6 : const PRUnichar * srcEnd = aSrc + *aSrcLength;
183 6 : char * dest = aDest;
184 6 : char * destEnd = aDest + *aDestLength;
185 : PRUnichar srcChar, tempChar;
186 : PRInt32 bcr, bcw;
187 :
188 6 : bcw = destEnd - dest;
189 6 : res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw);
190 6 : dest += bcw;
191 6 : if (res != NS_OK) {
192 0 : return res;
193 : }
194 :
195 112 : while (src < srcEnd) {
196 103 : srcChar = *src;
197 103 : if (!IS_HANKAKU(srcChar)) {
198 3 : break;
199 : }
200 100 : ++src;
201 100 : tempChar = gBasicMapping[(srcChar) - 0xff60];
202 :
203 100 : if (src < srcEnd) {
204 : // if the character could take a modifier, and the next char
205 : // is a modifier, modify it and eat one PRUnichar
206 99 : if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) {
207 23 : tempChar += NIGORI_MODIFIER;
208 23 : ++src;
209 76 : } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) {
210 5 : tempChar += MARU_MODIFIER;
211 5 : ++src;
212 : }
213 : }
214 100 : bcr = 1;
215 100 : bcw = destEnd - dest;
216 : res = nsUnicodeEncodeHelper::ConvertByTable(
217 : &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX],
218 100 : nsnull, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]);
219 100 : dest += bcw;
220 100 : if (res != NS_OK)
221 0 : break;
222 : }
223 6 : *aDestLength = dest - aDest;
224 6 : *aSrcLength = src - aSrc;
225 6 : return res;
226 : }
227 :
228 : //----------------------------------------------------------------------
229 : // Subclassing of nsTableEncoderSupport class [implementation]
230 :
231 30 : NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr(
232 : const PRUnichar * aSrc,
233 : PRInt32 * aSrcLength,
234 : char * aDest,
235 : PRInt32 * aDestLength)
236 : {
237 30 : nsresult res = NS_OK;
238 :
239 30 : const PRUnichar * src = aSrc;
240 30 : const PRUnichar * srcEnd = aSrc + *aSrcLength;
241 30 : char * dest = aDest;
242 30 : char * destEnd = aDest + *aDestLength;
243 : PRInt32 bcr, bcw;
244 : PRInt32 i;
245 :
246 85 : while (src < srcEnd) {
247 129 : for (i=0; i< SIZE_OF_TABLES ; i++) {
248 112 : bcr = 1;
249 112 : bcw = destEnd - dest;
250 : res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw,
251 : g_ufScanClassIDs[i], nsnull,
252 112 : (uMappingTable *) g_ufMappingTables[i]);
253 112 : if (res != NS_ERROR_UENC_NOMAPPING) break;
254 : }
255 :
256 36 : if ( i>= SIZE_OF_TABLES) {
257 17 : if (IS_HANKAKU(*src)) {
258 6 : bcr = srcEnd - src;
259 6 : bcw = destEnd - dest;
260 6 : res = ConvertHankaku(src, &bcr, dest, &bcw);
261 6 : dest += bcw;
262 6 : src += bcr;
263 6 : if (res == NS_OK) continue;
264 : } else {
265 11 : res = NS_ERROR_UENC_NOMAPPING;
266 11 : src++;
267 : }
268 : }
269 30 : if (res != NS_OK) break;
270 :
271 19 : bcw = destEnd - dest;
272 19 : res = ChangeCharset(i, dest, &bcw);
273 19 : dest += bcw;
274 19 : if (res != NS_OK) break;
275 :
276 19 : bcr = srcEnd - src;
277 19 : bcw = destEnd - dest;
278 : res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw,
279 : g_ufScanClassIDs[i], nsnull,
280 19 : (uMappingTable *) g_ufMappingTables[i]);
281 19 : src += bcr;
282 19 : dest += bcw;
283 :
284 19 : if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break;
285 19 : if (res == NS_ERROR_UENC_NOMAPPING) src--;
286 : }
287 :
288 30 : *aSrcLength = src - aSrc;
289 30 : *aDestLength = dest - aDest;
290 30 : return res;
291 : }
292 :
293 8 : NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest,
294 : PRInt32 * aDestLength)
295 : {
296 8 : ChangeCharset(0, aDest, aDestLength);
297 8 : return NS_OK;
298 : }
299 :
300 9 : NS_IMETHODIMP nsUnicodeToISO2022JP::Reset()
301 : {
302 9 : mCharset = 0;
303 9 : return nsEncoderSupport::Reset();
304 : }
|