1 : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Jungshik Shin <jshin@mailaps.org>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either of the GNU General Public License Version 2 or later (the "GPL"),
27 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 : #include "nsISO2022KRToUnicode.h"
39 : #include "nsUCSupport.h"
40 : #include "nsICharsetConverterManager.h"
41 : #include "nsIServiceManager.h"
42 :
43 : static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
44 :
45 11 : NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
46 : {
47 11 : const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
48 11 : const unsigned char* src =(unsigned char*) aSrc;
49 11 : PRUnichar* destEnd = aDest + *aDestLen;
50 11 : PRUnichar* dest = aDest;
51 298 : while((src < srcEnd))
52 : {
53 : // if LF/CR, return to US-ASCII unconditionally.
54 276 : if ( *src == 0x0a || *src == 0x0d )
55 6 : mState = mState_Init;
56 :
57 276 : switch(mState)
58 : {
59 : case mState_Init:
60 11 : if(0x1b == *src) {
61 4 : mLastLegalState = mState_ASCII;
62 4 : mState = mState_ESC;
63 4 : break;
64 : }
65 7 : mState = mState_ASCII;
66 : // fall through
67 :
68 : case mState_ASCII:
69 99 : if(0x0e == *src) { // Shift-Out
70 29 : mState = mState_KSX1001_1992;
71 29 : mRunLength = 0;
72 : }
73 70 : else if(*src & 0x80) {
74 0 : if (CHECK_OVERRUN(dest, destEnd, 1))
75 0 : goto error1;
76 0 : *dest++ = 0xFFFD;
77 : }
78 : else {
79 70 : if (CHECK_OVERRUN(dest, destEnd, 1))
80 0 : goto error1;
81 70 : *dest++ = (PRUnichar) *src;
82 : }
83 99 : break;
84 :
85 : case mState_ESC:
86 4 : if('$' == *src) {
87 4 : mState = mState_ESC_24;
88 : }
89 : else {
90 0 : if (CHECK_OVERRUN(dest, destEnd, 2))
91 0 : goto error1;
92 0 : *dest++ = (PRUnichar) 0x1b;
93 0 : *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
94 0 : mState = mLastLegalState;
95 : }
96 4 : break;
97 :
98 : case mState_ESC_24: // ESC $
99 4 : if(')' == *src) {
100 4 : mState = mState_ESC_24_29;
101 : }
102 : else {
103 0 : if (CHECK_OVERRUN(dest, destEnd, 3))
104 0 : goto error1;
105 0 : *dest++ = (PRUnichar) 0x1b;
106 0 : *dest++ = (PRUnichar) '$';
107 0 : *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
108 0 : mState = mLastLegalState;
109 : }
110 4 : break;
111 :
112 : case mState_ESC_24_29: // ESC $ )
113 4 : mState = mLastLegalState;
114 4 : if('C' == *src) {
115 4 : mState = mState_ASCII;
116 4 : mRunLength = 0;
117 : }
118 : else {
119 0 : if (CHECK_OVERRUN(dest, destEnd, 4))
120 0 : goto error1;
121 0 : *dest++ = (PRUnichar) 0x1b;
122 0 : *dest++ = (PRUnichar) '$';
123 0 : *dest++ = (PRUnichar) ')';
124 0 : *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
125 0 : mState = mLastLegalState;
126 : }
127 4 : break;
128 :
129 : case mState_KSX1001_1992:
130 91 : if (0x20 < (PRUint8) *src && (PRUint8) *src < 0x7f) {
131 70 : mData = (PRUint8) *src;
132 70 : mState = mState_KSX1001_1992_2ndbyte;
133 : }
134 21 : else if (0x0f == *src) { // Shift-In (SI)
135 21 : mState = mState_ASCII;
136 21 : if (mRunLength == 0) {
137 1 : if (CHECK_OVERRUN(dest, destEnd, 1))
138 0 : goto error1;
139 1 : *dest++ = 0xFFFD;
140 : }
141 21 : mRunLength = 0;
142 : }
143 0 : else if ((PRUint8) *src == 0x20 || (PRUint8) *src == 0x09) {
144 : // Allow space and tab between SO and SI (i.e. in Hangul segment)
145 0 : if (CHECK_OVERRUN(dest, destEnd, 1))
146 0 : goto error1;
147 0 : mState = mState_KSX1001_1992;
148 0 : *dest++ = (PRUnichar) *src;
149 0 : ++mRunLength;
150 : }
151 : else { // Everything else is invalid.
152 0 : if (CHECK_OVERRUN(dest, destEnd, 1))
153 0 : goto error1;
154 0 : *dest++ = 0xFFFD;
155 : }
156 91 : break;
157 :
158 : case mState_KSX1001_1992_2ndbyte:
159 70 : if ( 0x20 < (PRUint8) *src && (PRUint8) *src < 0x7f ) {
160 70 : if (!mEUCKRDecoder) {
161 : // creating a delegate converter (EUC-KR)
162 : nsresult rv;
163 : nsCOMPtr<nsICharsetConverterManager> ccm =
164 8 : do_GetService(kCharsetConverterManagerCID, &rv);
165 4 : if (NS_SUCCEEDED(rv)) {
166 4 : rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
167 : }
168 : }
169 :
170 70 : if (!mEUCKRDecoder) {// failed creating a delegate converter
171 0 : *dest++ = 0xFFFD;
172 : }
173 : else {
174 70 : if (CHECK_OVERRUN(dest, destEnd, 1))
175 0 : goto error1;
176 : unsigned char ksx[2];
177 : PRUnichar uni;
178 70 : PRInt32 ksxLen = 2, uniLen = 1;
179 : // mData is the original 1st byte.
180 : // *src is the present 2nd byte.
181 : // Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
182 70 : ksx[0] = mData | 0x80;
183 70 : ksx[1] = *src | 0x80;
184 : // Convert EUC-KR to unicode.
185 70 : mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
186 70 : *dest++ = uni;
187 70 : ++mRunLength;
188 : }
189 70 : mState = mState_KSX1001_1992;
190 : }
191 : else { // Invalid
192 0 : if ( 0x0f == *src ) { // Shift-In (SI)
193 0 : mState = mState_ASCII;
194 : }
195 : else {
196 0 : mState = mState_KSX1001_1992;
197 : }
198 0 : if (CHECK_OVERRUN(dest, destEnd, 1))
199 0 : goto error1;
200 0 : *dest++ = 0xFFFD;
201 : }
202 70 : break;
203 :
204 : case mState_ERROR:
205 0 : mState = mLastLegalState;
206 0 : if (CHECK_OVERRUN(dest, destEnd, 1))
207 0 : goto error1;
208 0 : *dest++ = 0xFFFD;
209 0 : break;
210 :
211 : } // switch
212 276 : src++;
213 : }
214 11 : *aDestLen = dest - aDest;
215 11 : return NS_OK;
216 :
217 : error1:
218 0 : *aDestLen = dest-aDest;
219 0 : *aSrcLen = src-(unsigned char*)aSrc;
220 0 : return NS_OK_UDEC_MOREOUTPUT;
221 : }
222 :
|