1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Pierre Phaneuf <pp@ludusdesign.com>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either of the GNU General Public License Version 2 or later (the "GPL"),
27 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 :
40 : #include "prmem.h"
41 : #include "prprf.h"
42 : #include "nsIServiceManager.h"
43 : #include "nsIComponentManager.h"
44 : #include "nsICharsetConverterManager.h"
45 : #include "nsSaveAsCharset.h"
46 : #include "nsCRT.h"
47 : #include "nsUnicharUtils.h"
48 : #include "nsCompressedCharMap.h"
49 : #include "nsReadableUtils.h"
50 : #include "nsWhitespaceTokenizer.h"
51 :
52 : //
53 : // nsISupports methods
54 : //
55 34 : NS_IMPL_ISUPPORTS1(nsSaveAsCharset, nsISaveAsCharset)
56 :
57 : //
58 : // nsSaveAsCharset
59 : //
60 2 : nsSaveAsCharset::nsSaveAsCharset()
61 : {
62 2 : mAttribute = attr_htmlTextDefault;
63 2 : mEntityVersion = 0;
64 2 : mCharsetListIndex = -1;
65 2 : }
66 :
67 4 : nsSaveAsCharset::~nsSaveAsCharset()
68 : {
69 8 : }
70 :
71 : NS_IMETHODIMP
72 2 : nsSaveAsCharset::Init(const char *charset, PRUint32 attr, PRUint32 entityVersion)
73 : {
74 2 : nsresult rv = NS_OK;
75 :
76 2 : mAttribute = attr;
77 2 : mEntityVersion = entityVersion;
78 :
79 2 : rv = SetupCharsetList(charset);
80 2 : NS_ENSURE_SUCCESS(rv, rv);
81 :
82 : // set up unicode encoder
83 2 : rv = SetupUnicodeEncoder(GetNextCharset());
84 2 : NS_ENSURE_SUCCESS(rv, rv);
85 :
86 : // set up entity converter
87 2 : if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
88 2 : mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
89 :
90 2 : return rv;
91 : }
92 :
93 : NS_IMETHODIMP
94 11 : nsSaveAsCharset::Convert(const PRUnichar *inString, char **_retval)
95 : {
96 11 : if (nsnull == _retval)
97 0 : return NS_ERROR_NULL_POINTER;
98 11 : if (nsnull == inString)
99 0 : return NS_ERROR_NULL_POINTER;
100 11 : if (0 == *inString)
101 0 : return NS_ERROR_ILLEGAL_VALUE;
102 11 : nsresult rv = NS_OK;
103 :
104 11 : NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
105 11 : NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
106 :
107 11 : *_retval = nsnull;
108 :
109 : // make sure to start from the first charset in the list
110 11 : if (mCharsetListIndex > 0) {
111 0 : mCharsetListIndex = -1;
112 0 : rv = SetupUnicodeEncoder(GetNextCharset());
113 0 : NS_ENSURE_SUCCESS(rv, rv);
114 : }
115 :
116 11 : do {
117 : // fallback to the next charset in the list if the last conversion failed by an unmapped character
118 11 : if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
119 0 : const char * charset = GetNextCharset();
120 0 : if (!charset)
121 0 : break;
122 0 : rv = SetupUnicodeEncoder(charset);
123 0 : NS_ENSURE_SUCCESS(rv, rv);
124 0 : PR_FREEIF(*_retval);
125 : }
126 :
127 11 : if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
128 0 : NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
129 0 : NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
130 0 : PRUnichar *entity = nsnull;
131 : // do the entity conversion first
132 0 : rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
133 0 : if(NS_SUCCEEDED(rv)) {
134 0 : rv = DoCharsetConversion(entity, _retval);
135 0 : nsMemory::Free(entity);
136 : }
137 : }
138 : else
139 11 : rv = DoCharsetConversion(inString, _retval);
140 :
141 : } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
142 :
143 11 : return rv;
144 : }
145 :
146 : NS_IMETHODIMP
147 0 : nsSaveAsCharset::GetCharset(char * *aCharset)
148 : {
149 0 : NS_ENSURE_ARG(aCharset);
150 0 : NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
151 0 : NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
152 :
153 0 : const char* charset = mCharsetList[mCharsetListIndex].get();
154 0 : if (!charset) {
155 0 : *aCharset = nsnull;
156 0 : NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
157 0 : return NS_ERROR_FAILURE;
158 : }
159 :
160 0 : *aCharset = nsCRT::strdup(charset);
161 0 : return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
162 : }
163 :
164 : /////////////////////////////////////////////////////////////////////////////////////////
165 :
166 : // do the fallback, reallocate the buffer if necessary
167 : // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
168 : NS_IMETHODIMP
169 36 : nsSaveAsCharset::HandleFallBack(PRUint32 character, char **outString, PRInt32 *bufferLength,
170 : PRInt32 *currentPos, PRInt32 estimatedLength)
171 : {
172 36 : if((nsnull == outString ) || (nsnull == bufferLength) ||(nsnull ==currentPos))
173 0 : return NS_ERROR_NULL_POINTER;
174 : char fallbackStr[256];
175 36 : nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
176 36 : if (NS_SUCCEEDED(rv)) {
177 36 : PRInt32 tempLen = (PRInt32) PL_strlen(fallbackStr);
178 :
179 : // reallocate if the buffer is not large enough
180 36 : if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
181 0 : char *temp = (char *) PR_Realloc(*outString, *bufferLength + tempLen);
182 0 : if (NULL != temp) {
183 : // adjust length/pointer after realloc
184 0 : *bufferLength += tempLen;
185 0 : *outString = temp;
186 : } else {
187 0 : *outString = NULL;
188 0 : *bufferLength =0;
189 0 : return NS_ERROR_OUT_OF_MEMORY;
190 : }
191 : }
192 36 : memcpy((*outString + *currentPos), fallbackStr, tempLen);
193 36 : *currentPos += tempLen;
194 : }
195 36 : return rv;
196 : }
197 :
198 : NS_IMETHODIMP
199 11 : nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
200 : {
201 11 : if(nsnull == outString )
202 0 : return NS_ERROR_NULL_POINTER;
203 11 : NS_ASSERTION(outString, "invalid input");
204 :
205 11 : *outString = NULL;
206 :
207 : nsresult rv;
208 11 : PRInt32 inStringLength = nsCRT::strlen(inString); // original input string length
209 : PRInt32 bufferLength; // allocated buffer length
210 11 : PRInt32 srcLength = inStringLength;
211 : PRInt32 dstLength;
212 11 : char *dstPtr = NULL;
213 : PRInt32 pos1, pos2;
214 11 : nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
215 :
216 : // estimate and allocate the target buffer (reserve extra memory for fallback)
217 11 : rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
218 11 : if (NS_FAILED(rv)) return rv;
219 :
220 11 : bufferLength = dstLength + 512; // reserve 512 byte for fallback.
221 11 : dstPtr = (char *) PR_Malloc(bufferLength);
222 11 : if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;
223 :
224 :
225 58 : for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
226 : // convert from unicode
227 42 : dstLength = bufferLength - pos2;
228 42 : rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
229 :
230 42 : pos1 += srcLength ? srcLength : 1;
231 42 : pos2 += dstLength;
232 42 : dstPtr[pos2] = '\0';
233 :
234 : // break: this is usually the case (no error) OR unrecoverable error
235 42 : if (NS_ERROR_UENC_NOMAPPING != rv) break;
236 :
237 : // remember this happened and reset the result
238 36 : saveResult = rv;
239 36 : rv = NS_OK;
240 :
241 : // finish encoder, give it a chance to write extra data like escape sequences
242 36 : dstLength = bufferLength - pos2;
243 36 : rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
244 36 : if (NS_SUCCEEDED(rv)) {
245 36 : pos2 += dstLength;
246 36 : dstPtr[pos2] = '\0';
247 : }
248 :
249 36 : srcLength = inStringLength - pos1;
250 :
251 : // do the fallback
252 36 : if (!ATTR_NO_FALLBACK(mAttribute)) {
253 : PRUint32 unMappedChar;
254 37 : if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) &&
255 1 : inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
256 1 : unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
257 1 : pos1++;
258 : } else {
259 35 : unMappedChar = inString[pos1-1];
260 : }
261 :
262 36 : rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
263 36 : if (NS_FAILED(rv))
264 0 : break;
265 :
266 36 : rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
267 36 : if (NS_FAILED(rv))
268 0 : break;
269 36 : dstPtr[pos2] = '\0';
270 : }
271 : }
272 :
273 11 : if (NS_SUCCEEDED(rv)) {
274 : // finish encoder, give it a chance to write extra data like escape sequences
275 11 : dstLength = bufferLength - pos2;
276 11 : rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
277 11 : if (NS_SUCCEEDED(rv)) {
278 11 : pos2 += dstLength;
279 11 : dstPtr[pos2] = '\0';
280 : }
281 : }
282 :
283 11 : if (NS_FAILED(rv)) {
284 0 : PR_FREEIF(dstPtr);
285 0 : return rv;
286 : }
287 :
288 11 : *outString = dstPtr; // set the result string
289 :
290 : // set error code so that the caller can do own fall back
291 11 : if (NS_ERROR_UENC_NOMAPPING == saveResult) {
292 10 : rv = NS_ERROR_UENC_NOMAPPING;
293 : }
294 :
295 11 : return rv;
296 : }
297 :
298 : NS_IMETHODIMP
299 36 : nsSaveAsCharset::DoConversionFallBack(PRUint32 inUCS4, char *outString, PRInt32 bufferLength)
300 : {
301 36 : NS_ASSERTION(outString, "invalid input");
302 36 : if(nsnull == outString )
303 0 : return NS_ERROR_NULL_POINTER;
304 :
305 36 : *outString = '\0';
306 :
307 36 : nsresult rv = NS_OK;
308 :
309 36 : if (ATTR_NO_FALLBACK(mAttribute)) {
310 0 : return NS_OK;
311 : }
312 36 : if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
313 36 : char *entity = NULL;
314 36 : rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
315 36 : if (NS_SUCCEEDED(rv)) {
316 36 : if (NULL == entity || (PRInt32)strlen(entity) > bufferLength) {
317 0 : return NS_ERROR_OUT_OF_MEMORY;
318 : }
319 36 : PL_strcpy(outString, entity);
320 36 : nsMemory::Free(entity);
321 36 : return rv;
322 : }
323 : }
324 :
325 0 : switch (MASK_FALLBACK(mAttribute)) {
326 : case attr_FallbackQuestionMark:
327 0 : if(bufferLength>=2) {
328 0 : *outString++='?';
329 0 : *outString='\0';
330 0 : rv = NS_OK;
331 : } else {
332 0 : rv = NS_ERROR_FAILURE;
333 : }
334 0 : break;
335 : case attr_FallbackEscapeU:
336 0 : if (inUCS4 & 0xff0000)
337 0 : rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
338 : else
339 0 : rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
340 0 : break;
341 : case attr_FallbackDecimalNCR:
342 0 : rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
343 0 : break;
344 : case attr_FallbackHexNCR:
345 0 : rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
346 0 : break;
347 : case attr_FallbackNone:
348 0 : rv = NS_OK;
349 0 : break;
350 : default:
351 0 : rv = NS_ERROR_ILLEGAL_VALUE;
352 0 : break;
353 : }
354 :
355 0 : return rv;
356 : }
357 :
358 2 : nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
359 : {
360 2 : NS_ENSURE_ARG(charset);
361 : nsresult rv;
362 :
363 : // set up unicode encoder
364 4 : nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
365 2 : NS_ENSURE_SUCCESS(rv, rv);
366 :
367 2 : return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
368 : }
369 :
370 2 : nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
371 : {
372 2 : NS_ENSURE_ARG(charsetList);
373 :
374 2 : NS_ASSERTION(charsetList[0], "charsetList should not be empty");
375 2 : if (!charsetList[0])
376 0 : return NS_ERROR_INVALID_ARG;
377 :
378 2 : if (mCharsetListIndex >= 0) {
379 0 : mCharsetList.Clear();
380 0 : mCharsetListIndex = -1;
381 : }
382 :
383 2 : nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList);
384 6 : while (tokenizer.hasMoreTokens()) {
385 2 : ParseString(tokenizer.nextToken(), ',', mCharsetList);
386 : }
387 :
388 2 : return NS_OK;
389 : }
390 :
391 2 : const char * nsSaveAsCharset::GetNextCharset()
392 : {
393 2 : if ((mCharsetListIndex + 1) >= PRInt32(mCharsetList.Length()))
394 0 : return nsnull;
395 :
396 : // bump the index and return the next charset
397 2 : return mCharsetList[++mCharsetListIndex].get();
398 : }
|