1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:expandtab:shiftwidth=2:tabstop=4:
3 : */
4 : /* ***** BEGIN LICENSE BLOCK *****
5 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 : *
7 : * The contents of this file are subject to the Mozilla Public License Version
8 : * 1.1 (the "License"); you may not use this file except in compliance with
9 : * the License. You may obtain a copy of the License at
10 : * http://www.mozilla.org/MPL/
11 : *
12 : * Software distributed under the License is distributed on an "AS IS" basis,
13 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 : * for the specific language governing rights and limitations under the
15 : * License.
16 : *
17 : * The Original Code is mozilla.org code.
18 : *
19 : * The Initial Developer of the Original Code is
20 : * Netscape Communications Corporation.
21 : * Portions created by the Initial Developer are Copyright (C) 1998
22 : * the Initial Developer. All Rights Reserved.
23 : *
24 : * Contributor(s):
25 : * rhp@netscape.com
26 : * Jungshik Shin <jshin@mailaps.org>
27 : * John G Myers <jgmyers@netscape.com>
28 : * Takayuki Tei <taka@netscape.com>
29 : *
30 : * Alternatively, the contents of this file may be used under the terms of
31 : * either the GNU General Public License Version 2 or later (the "GPL"), or
32 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
33 : * in which case the provisions of the GPL or the LGPL are applicable instead
34 : * of those above. If you wish to allow use of your version of this file only
35 : * under the terms of either the GPL or the LGPL, and not to allow others to
36 : * use your version of this file under the terms of the MPL, indicate your
37 : * decision by deleting the provisions above and replace them with the notice
38 : * and other provisions required by the GPL or the LGPL. If you do not delete
39 : * the provisions above, a recipient may use your version of this file under
40 : * the terms of any one of the MPL, the GPL or the LGPL.
41 : *
42 : * ***** END LICENSE BLOCK ***** */
43 :
44 : #include <string.h>
45 : #include "prtypes.h"
46 : #include "prmem.h"
47 : #include "prprf.h"
48 : #include "plstr.h"
49 : #include "plbase64.h"
50 : #include "nsCRT.h"
51 : #include "nsMemory.h"
52 : #include "nsCOMPtr.h"
53 : #include "nsEscape.h"
54 : #include "nsIUTF8ConverterService.h"
55 : #include "nsUConvCID.h"
56 : #include "nsIServiceManager.h"
57 : #include "nsMIMEHeaderParamImpl.h"
58 : #include "nsReadableUtils.h"
59 : #include "nsNativeCharsetUtils.h"
60 : #include "nsNetError.h"
61 :
62 : // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
63 :
64 : static char *DecodeQ(const char *, PRUint32);
65 : static bool Is7bitNonAsciiString(const char *, PRUint32);
66 : static void CopyRawHeader(const char *, PRUint32, const char *, nsACString &);
67 : static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
68 :
69 : // XXX The chance of UTF-7 being used in the message header is really
70 : // low, but in theory it's possible.
71 : #define IS_7BIT_NON_ASCII_CHARSET(cset) \
72 : (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
73 : !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
74 : !nsCRT::strncasecmp((cset), "UTF-7", 5))
75 :
76 234 : NS_IMPL_ISUPPORTS1(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
77 :
78 : NS_IMETHODIMP
79 140 : nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
80 : const char *aParamName,
81 : const nsACString& aFallbackCharset,
82 : bool aTryLocaleCharset,
83 : char **aLang, nsAString& aResult)
84 : {
85 : return DoGetParameter(aHeaderVal, aParamName, RFC_2231_DECODING,
86 140 : aFallbackCharset, aTryLocaleCharset, aLang, aResult);
87 : }
88 :
89 : NS_IMETHODIMP
90 97 : nsMIMEHeaderParamImpl::GetParameter5987(const nsACString& aHeaderVal,
91 : const char *aParamName,
92 : const nsACString& aFallbackCharset,
93 : bool aTryLocaleCharset,
94 : char **aLang, nsAString& aResult)
95 : {
96 : return DoGetParameter(aHeaderVal, aParamName, RFC_5987_DECODING,
97 97 : aFallbackCharset, aTryLocaleCharset, aLang, aResult);
98 : }
99 :
100 : // XXX : aTryLocaleCharset is not yet effective.
101 : nsresult
102 237 : nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
103 : const char *aParamName,
104 : ParamDecoding aDecoding,
105 : const nsACString& aFallbackCharset,
106 : bool aTryLocaleCharset,
107 : char **aLang, nsAString& aResult)
108 : {
109 237 : aResult.Truncate();
110 : nsresult rv;
111 :
112 : // get parameter (decode RFC 2231/5987 when applicable, as specified by
113 : // aDecoding (5987 being a subset of 2231) and return charset.)
114 474 : nsXPIDLCString med;
115 474 : nsXPIDLCString charset;
116 237 : rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
117 237 : aDecoding, getter_Copies(charset), aLang,
118 474 : getter_Copies(med));
119 237 : if (NS_FAILED(rv))
120 32 : return rv;
121 :
122 : // convert to UTF-8 after charset conversion and RFC 2047 decoding
123 : // if necessary.
124 :
125 410 : nsCAutoString str1;
126 205 : rv = DecodeParameter(med, charset.get(), nsnull, false, str1);
127 205 : NS_ENSURE_SUCCESS(rv, rv);
128 :
129 205 : if (!aFallbackCharset.IsEmpty())
130 : {
131 410 : nsCAutoString str2;
132 : nsCOMPtr<nsIUTF8ConverterService>
133 410 : cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
134 1025 : if (cvtUTF8 &&
135 820 : NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1,
136 : PromiseFlatCString(aFallbackCharset).get(), false, str2))) {
137 203 : CopyUTF8toUTF16(str2, aResult);
138 203 : return NS_OK;
139 : }
140 : }
141 :
142 2 : if (IsUTF8(str1)) {
143 0 : CopyUTF8toUTF16(str1, aResult);
144 0 : return NS_OK;
145 : }
146 :
147 2 : if (aTryLocaleCharset && !NS_IsNativeUTF8())
148 0 : return NS_CopyNativeToUnicode(str1, aResult);
149 :
150 2 : CopyASCIItoUTF16(str1, aResult);
151 2 : return NS_OK;
152 : }
153 :
154 : // remove backslash-encoded sequences from quoted-strings
155 : // modifies string in place, potentially shortening it
156 10 : void RemoveQuotedStringEscapes(char *src)
157 : {
158 10 : char *dst = src;
159 :
160 20 : for (char *c = src; *c; ++c)
161 : {
162 10 : if (c[0] == '\\' && c[1])
163 : {
164 : // skip backslash if not at end
165 6 : ++c;
166 : }
167 10 : *dst++ = *c;
168 : }
169 10 : *dst = 0;
170 10 : }
171 :
172 : // moved almost verbatim from mimehdrs.cpp
173 : // char *
174 : // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
175 : // char **charset, char **language)
176 : //
177 : // The format of these header lines is
178 : // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
179 : NS_IMETHODIMP
180 0 : nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
181 : const char *aParamName,
182 : char **aCharset,
183 : char **aLang,
184 : char **aResult)
185 : {
186 : return DoParameterInternal(aHeaderValue, aParamName, RFC_2231_DECODING,
187 0 : aCharset, aLang, aResult);
188 : }
189 :
190 :
191 : nsresult
192 237 : nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
193 : const char *aParamName,
194 : ParamDecoding aDecoding,
195 : char **aCharset,
196 : char **aLang,
197 : char **aResult)
198 : {
199 :
200 237 : if (!aHeaderValue || !*aHeaderValue || !aResult)
201 12 : return NS_ERROR_INVALID_ARG;
202 :
203 225 : *aResult = nsnull;
204 :
205 225 : if (aCharset) *aCharset = nsnull;
206 225 : if (aLang) *aLang = nsnull;
207 :
208 225 : const char *str = aHeaderValue;
209 :
210 : // skip leading white space.
211 225 : for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
212 : ;
213 225 : const char *start = str;
214 :
215 : // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
216 : // For instance, return 'inline' in the following case:
217 : // Content-Disposition: inline; filename=.....
218 225 : if (!aParamName || !*aParamName)
219 : {
220 115 : for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
221 : ;
222 115 : if (str == start)
223 2 : return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
224 :
225 113 : *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
226 113 : NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
227 113 : (*aResult)[str - start] = '\0'; // null-terminate
228 113 : return NS_OK;
229 : }
230 :
231 : /* Skip forward to first ';' */
232 110 : for (; *str && *str != ';' && *str != ','; ++str)
233 : ;
234 110 : if (*str)
235 102 : str++;
236 : /* Skip over following whitespace */
237 110 : for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
238 : ;
239 :
240 : // Some broken http servers just specify parameters
241 : // like 'filename' without specifying disposition
242 : // method. Rewind to the first non-white-space
243 : // character.
244 :
245 110 : if (!*str)
246 10 : str = start;
247 :
248 : // RFC2231 - The legitimate parm format can be:
249 : // A. title=ThisIsTitle
250 : // B. title*=us-ascii'en-us'This%20is%20wierd.
251 : // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
252 : // title*1*=have%20to%20support%20this.
253 : // title*2="Else..."
254 : // D. title*0="Hey, what you think you are doing?"
255 : // title*1="There is no charset and lang info."
256 : // RFC5987: only A and B
257 :
258 110 : PRInt32 paramLen = strlen(aParamName);
259 :
260 110 : bool haveCaseAValue = false;
261 110 : PRInt32 nextContinuation = 0; // next value in series, or -1 if error
262 :
263 462 : while (*str) {
264 274 : const char *tokenStart = str;
265 274 : const char *tokenEnd = 0;
266 274 : const char *valueStart = str;
267 274 : const char *valueEnd = 0;
268 274 : bool seenEquals = false;
269 :
270 274 : NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
271 :
272 : // Skip forward to the end of this token.
273 274 : for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
274 : ;
275 274 : tokenEnd = str;
276 :
277 : // Skip over whitespace, '=', and whitespace
278 274 : while (nsCRT::IsAsciiSpace(*str)) ++str;
279 274 : if (*str == '=') {
280 264 : ++str;
281 264 : seenEquals = true;
282 : }
283 274 : while (nsCRT::IsAsciiSpace(*str)) ++str;
284 :
285 274 : bool needUnquote = false;
286 :
287 274 : if (*str != '"')
288 : {
289 : // The value is a token, not a quoted string.
290 260 : valueStart = str;
291 1948 : for (valueEnd = str;
292 1688 : *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
293 : valueEnd++)
294 : ;
295 260 : str = valueEnd;
296 : }
297 : else
298 : {
299 : // The value is a quoted string.
300 14 : needUnquote = true;
301 :
302 14 : ++str;
303 14 : valueStart = str;
304 54 : for (valueEnd = str; *valueEnd; ++valueEnd)
305 : {
306 52 : if (*valueEnd == '\\')
307 6 : ++valueEnd;
308 46 : else if (*valueEnd == '"')
309 12 : break;
310 : }
311 14 : str = valueEnd;
312 : // *valueEnd != null means that *valueEnd is quote character.
313 14 : if (*valueEnd)
314 12 : str++;
315 : }
316 :
317 : // See if this is the simplest case (case A above),
318 : // a 'single' line value with no charset and lang.
319 : // If so, copy it and return.
320 339 : if (tokenEnd - tokenStart == paramLen &&
321 : seenEquals &&
322 65 : !nsCRT::strncasecmp(tokenStart, aParamName, paramLen))
323 : {
324 65 : if (*aResult)
325 : {
326 : // either seen earlier caseA value already--we prefer first--or caseA
327 : // came after a continuation: either way, prefer other value
328 3 : goto increment_str;
329 : }
330 : // if the parameter spans across multiple lines we have to strip out the
331 : // line continuation -- jht 4/29/98
332 124 : nsCAutoString tempStr(valueStart, valueEnd - valueStart);
333 62 : tempStr.StripChars("\r\n");
334 62 : char *res = ToNewCString(tempStr);
335 62 : NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
336 :
337 62 : if (needUnquote)
338 10 : RemoveQuotedStringEscapes(res);
339 :
340 62 : *aResult = res;
341 :
342 124 : haveCaseAValue = true;
343 : // keep going, we may find a RFC 2231/5987 encoded alternative
344 : }
345 : // case B, C, and D
346 611 : else if (tokenEnd - tokenStart > paramLen &&
347 204 : !nsCRT::strncasecmp(tokenStart, aParamName, paramLen) &&
348 : seenEquals &&
349 198 : *(tokenStart + paramLen) == '*')
350 : {
351 198 : const char *cp = tokenStart + paramLen + 1; // 1st char past '*'
352 198 : bool needUnescape = *(tokenEnd - 1) == '*';
353 :
354 198 : bool caseB = (tokenEnd - tokenStart) == paramLen + 1;
355 198 : bool caseCorDStart = (*cp == '0') && needUnescape;
356 198 : bool acceptContinuations = (aDecoding != RFC_5987_DECODING);
357 :
358 : // CaseB and start of CaseC: requires charset and optional language
359 : // in quotes (quotes required even if lang is blank)
360 198 : if (caseB || (caseCorDStart && acceptContinuations))
361 : {
362 39 : if (caseCorDStart) {
363 7 : if (nextContinuation++ != 0)
364 : {
365 : // error: already started a continuation. Skip future
366 : // continuations and return whatever initial parts were in order.
367 1 : nextContinuation = -1;
368 1 : goto increment_str;
369 : }
370 : }
371 : // look for single quotation mark(')
372 38 : const char *sQuote1 = PL_strchr(valueStart, 0x27);
373 38 : const char *sQuote2 = (char *) (sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nsnull);
374 :
375 : // Two single quotation marks must be present even in
376 : // absence of charset and lang.
377 38 : if (!sQuote1 || !sQuote2)
378 8 : NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
379 38 : if (aCharset && sQuote1 > valueStart && sQuote1 < valueEnd)
380 : {
381 32 : *aCharset = (char *) nsMemory::Clone(valueStart, sQuote1 - valueStart + 1);
382 32 : if (*aCharset)
383 32 : *(*aCharset + (sQuote1 - valueStart)) = 0;
384 : }
385 38 : if (aLang && sQuote1 && sQuote2 && sQuote2 > sQuote1 + 1 &&
386 : sQuote2 < valueEnd)
387 : {
388 0 : *aLang = (char *) nsMemory::Clone(sQuote1 + 1, sQuote2 - (sQuote1 + 1) + 1);
389 0 : if (*aLang)
390 0 : *(*aLang + (sQuote2 - (sQuote1 + 1))) = 0;
391 : }
392 :
393 : // Be generous and handle gracefully when required
394 : // single quotes are absent.
395 38 : if (sQuote1)
396 : {
397 34 : if(!sQuote2)
398 4 : sQuote2 = sQuote1;
399 : }
400 : else
401 4 : sQuote2 = valueStart - 1;
402 :
403 38 : if (sQuote2 && sQuote2 + 1 < valueEnd)
404 : {
405 38 : if (*aResult)
406 : {
407 : // caseA value already read, or caseC/D value already read
408 : // but we're now reading caseB: either way, drop old value
409 14 : nsMemory::Free(*aResult);
410 14 : haveCaseAValue = false;
411 : }
412 38 : *aResult = (char *) nsMemory::Alloc(valueEnd - (sQuote2 + 1) + 1);
413 38 : if (*aResult)
414 : {
415 38 : memcpy(*aResult, sQuote2 + 1, valueEnd - (sQuote2 + 1));
416 38 : *(*aResult + (valueEnd - (sQuote2 + 1))) = 0;
417 38 : if (needUnescape)
418 : {
419 38 : nsUnescape(*aResult);
420 38 : if (caseB)
421 32 : return NS_OK; // caseB wins over everything else
422 : }
423 : }
424 6 : }
425 : } // end of if-block : title*0*= or title*=
426 : // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
427 : // or 2nd or later lines of a caseC param : title*[1-9]*=
428 159 : else if (acceptContinuations && nsCRT::IsAsciiDigit(PRUnichar(*cp)))
429 : {
430 76 : PRInt32 nextSegment = atoi(cp);
431 : // no leading zeros allowed except for ... position 0
432 76 : bool broken = nextSegment > 0 && *cp == '0';
433 :
434 76 : if (broken || nextSegment != nextContinuation++)
435 : {
436 : // error: gap in continuation or unneccessary leading 0.
437 : // Skip future continuations and return whatever initial parts were
438 : // in order.
439 17 : nextContinuation = -1;
440 17 : goto increment_str;
441 : }
442 59 : if (haveCaseAValue && *aResult)
443 : {
444 : // drop caseA value
445 4 : nsMemory::Free(*aResult);
446 4 : *aResult = 0;
447 4 : haveCaseAValue = false;
448 : }
449 59 : PRInt32 len = 0;
450 59 : if (*aResult) // 2nd or later lines of multiline parameter
451 : {
452 49 : len = strlen(*aResult);
453 49 : char *ns = (char *) nsMemory::Realloc(*aResult, len + (valueEnd - valueStart) + 1);
454 49 : if (!ns)
455 : {
456 0 : nsMemory::Free(*aResult);
457 : }
458 49 : *aResult = ns;
459 : }
460 : else
461 : {
462 10 : NS_ASSERTION(*cp == '0', "Not first value in continuation"); // must be; 1st line : title*0=
463 10 : *aResult = (char *) nsMemory::Alloc(valueEnd - valueStart + 1);
464 : }
465 59 : if (*aResult)
466 : {
467 : // append a partial value
468 59 : memcpy(*aResult + len, valueStart, valueEnd - valueStart);
469 59 : *(*aResult + len + (valueEnd - valueStart)) = 0;
470 59 : if (needUnescape)
471 3 : nsUnescape(*aResult + len);
472 : }
473 : else
474 0 : return NS_ERROR_OUT_OF_MEMORY;
475 : } // end of if-block : title*[0-9]= or title*[1-9]*=
476 : }
477 :
478 : // str now points after the end of the value.
479 : // skip over whitespace, ';', whitespace.
480 : increment_str:
481 242 : while (nsCRT::IsAsciiSpace(*str)) ++str;
482 242 : if (*str == ';') ++str;
483 242 : while (nsCRT::IsAsciiSpace(*str)) ++str;
484 : }
485 :
486 78 : if (*aResult)
487 60 : return NS_OK;
488 : else
489 18 : return NS_ERROR_INVALID_ARG; // aParameter not found !!
490 : }
491 :
492 :
493 : NS_IMETHODIMP
494 173 : nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
495 : const char* aDefaultCharset,
496 : bool aOverrideCharset,
497 : bool aEatContinuations,
498 : nsACString& aResult)
499 : {
500 173 : aResult.Truncate();
501 173 : if (!aHeaderVal)
502 0 : return NS_ERROR_INVALID_ARG;
503 173 : if (!*aHeaderVal)
504 7 : return NS_OK;
505 :
506 :
507 : // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
508 : // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
509 : // to UTF-8. Otherwise, just strips away CRLF.
510 332 : if (PL_strstr(aHeaderVal, "=?") ||
511 166 : (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
512 0 : Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal))))) {
513 0 : DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
514 498 : } else if (aEatContinuations &&
515 332 : (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
516 0 : aResult = aHeaderVal;
517 : } else {
518 166 : aEatContinuations = false;
519 166 : aResult = aHeaderVal;
520 : }
521 :
522 166 : if (aEatContinuations) {
523 0 : nsCAutoString temp(aResult);
524 0 : temp.ReplaceSubstring("\n\t", " ");
525 0 : temp.ReplaceSubstring("\r\t", " ");
526 0 : temp.StripChars("\r\n");
527 0 : aResult = temp;
528 : }
529 :
530 166 : return NS_OK;
531 : }
532 :
533 : NS_IMETHODIMP
534 205 : nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
535 : const char* aCharset,
536 : const char* aDefaultCharset,
537 : bool aOverrideCharset,
538 : nsACString& aResult)
539 : {
540 205 : aResult.Truncate();
541 : // If aCharset is given, aParamValue was obtained from RFC2231/5987
542 : // encoding and we're pretty sure that it's in aCharset.
543 205 : if (aCharset && *aCharset)
544 : {
545 64 : nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
546 32 : if (cvtUTF8)
547 : // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
548 32 : return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
549 32 : IS_7BIT_NON_ASCII_CHARSET(aCharset), aResult);
550 : }
551 :
552 346 : const nsAFlatCString& param = PromiseFlatCString(aParamValue);
553 346 : nsCAutoString unQuoted;
554 173 : nsACString::const_iterator s, e;
555 173 : param.BeginReading(s);
556 173 : param.EndReading(e);
557 :
558 : // strip '\' when used to quote CR, LF, '"' and '\'
559 1702 : for ( ; s != e; ++s) {
560 1529 : if ((*s == '\\')) {
561 0 : if (++s == e) {
562 0 : --s; // '\' is at the end. move back and append '\'.
563 : }
564 0 : else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
565 0 : --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
566 : }
567 : // else : skip '\' and append the quoted character.
568 : }
569 1529 : unQuoted.Append(*s);
570 : }
571 :
572 173 : aResult = unQuoted;
573 :
574 346 : nsCAutoString decoded;
575 :
576 : // Try RFC 2047 encoding, instead.
577 : nsresult rv = DecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
578 173 : aOverrideCharset, true, decoded);
579 :
580 173 : if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
581 166 : aResult = decoded;
582 :
583 173 : return rv;
584 : }
585 :
586 : #define ISHEXCHAR(c) \
587 : ((0x30 <= PRUint8(c) && PRUint8(c) <= 0x39) || \
588 : (0x41 <= PRUint8(c) && PRUint8(c) <= 0x46) || \
589 : (0x61 <= PRUint8(c) && PRUint8(c) <= 0x66))
590 :
591 : // Decode Q encoding (RFC 2047).
592 : // static
593 0 : char *DecodeQ(const char *in, PRUint32 length)
594 : {
595 0 : char *out, *dest = 0;
596 :
597 0 : out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
598 0 : if (dest == nsnull)
599 0 : return nsnull;
600 0 : while (length > 0) {
601 0 : PRUintn c = 0;
602 0 : switch (*in) {
603 : case '=':
604 : // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
605 0 : if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
606 : goto badsyntax;
607 0 : PR_sscanf(in + 1, "%2X", &c);
608 0 : *out++ = (char) c;
609 0 : in += 3;
610 0 : length -= 3;
611 0 : break;
612 :
613 : case '_':
614 0 : *out++ = ' ';
615 0 : in++;
616 0 : length--;
617 0 : break;
618 :
619 : default:
620 0 : if (*in & 0x80) goto badsyntax;
621 0 : *out++ = *in++;
622 0 : length--;
623 : }
624 : }
625 0 : *out++ = '\0';
626 :
627 0 : for (out = dest; *out ; ++out) {
628 0 : if (*out == '\t')
629 0 : *out = ' ';
630 : }
631 :
632 0 : return dest;
633 :
634 : badsyntax:
635 0 : PR_Free(dest);
636 0 : return nsnull;
637 : }
638 :
639 : // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
640 : // or has ESC which may be an indication that it's in one of many ISO
641 : // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
642 : // static
643 0 : bool Is7bitNonAsciiString(const char *input, PRUint32 len)
644 : {
645 : PRInt32 c;
646 :
647 : enum { hz_initial, // No HZ seen yet
648 : hz_escaped, // Inside an HZ ~{ escape sequence
649 : hz_seen, // Have seen at least one complete HZ sequence
650 : hz_notpresent // Have seen something that is not legal HZ
651 : } hz_state;
652 :
653 0 : hz_state = hz_initial;
654 0 : while (len) {
655 0 : c = PRUint8(*input++);
656 0 : len--;
657 0 : if (c & 0x80) return false;
658 0 : if (c == 0x1B) return true;
659 0 : if (c == '~') {
660 0 : switch (hz_state) {
661 : case hz_initial:
662 : case hz_seen:
663 0 : if (*input == '{') {
664 0 : hz_state = hz_escaped;
665 0 : } else if (*input == '~') {
666 : // ~~ is the HZ encoding of ~. Skip over second ~ as well
667 0 : hz_state = hz_seen;
668 0 : input++;
669 0 : len--;
670 : } else {
671 0 : hz_state = hz_notpresent;
672 : }
673 0 : break;
674 :
675 : case hz_escaped:
676 0 : if (*input == '}') hz_state = hz_seen;
677 0 : break;
678 : default:
679 0 : break;
680 : }
681 : }
682 : }
683 0 : return hz_state == hz_seen;
684 : }
685 :
686 : #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
687 :
688 : // copy 'raw' sequences of octets in aInput to aOutput.
689 : // If aDefaultCharset is specified, the input is assumed to be in the
690 : // charset and converted to UTF-8. Otherwise, a blind copy is made.
691 : // If aDefaultCharset is specified, but the conversion to UTF-8
692 : // is not successful, each octet is replaced by Unicode replacement
693 : // chars. *aOutput is advanced by the number of output octets.
694 : // static
695 0 : void CopyRawHeader(const char *aInput, PRUint32 aLen,
696 : const char *aDefaultCharset, nsACString &aOutput)
697 : {
698 : PRInt32 c;
699 :
700 : // If aDefaultCharset is not specified, make a blind copy.
701 0 : if (!aDefaultCharset || !*aDefaultCharset) {
702 0 : aOutput.Append(aInput, aLen);
703 0 : return;
704 : }
705 :
706 : // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
707 : // A ~ may indicate it is HZ
708 0 : while (aLen && (c = PRUint8(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
709 0 : aOutput.Append(char(c));
710 0 : aLen--;
711 : }
712 0 : if (!aLen) {
713 0 : return;
714 : }
715 0 : aInput--;
716 :
717 : // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
718 : // string and aDefaultCharset is a 7bit non-ascii charset.
719 : bool skipCheck = (c == 0x1B || c == '~') &&
720 0 : IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
721 :
722 : // If not UTF-8, treat as default charset
723 : nsCOMPtr<nsIUTF8ConverterService>
724 0 : cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
725 0 : nsCAutoString utf8Text;
726 0 : if (cvtUTF8 &&
727 0 : NS_SUCCEEDED(
728 : cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen),
729 : aDefaultCharset, skipCheck, utf8Text))) {
730 0 : aOutput.Append(utf8Text);
731 : } else { // replace each octet with Unicode replacement char in UTF-8.
732 0 : for (PRUint32 i = 0; i < aLen; i++) {
733 0 : c = PRUint8(*aInput++);
734 0 : if (c & 0x80)
735 0 : aOutput.Append(REPLACEMENT_CHAR);
736 : else
737 0 : aOutput.Append(char(c));
738 : }
739 : }
740 : }
741 :
742 : static const char especials[] = "()<>@,;:\\\"/[]?.=";
743 :
744 : // |decode_mime_part2_str| taken from comi18n.c
745 : // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
746 : // If aOverrideCharset is true, charset in RFC2047-encoded words is
747 : // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
748 : // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
749 : //static
750 0 : nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset,
751 : bool aOverrideCharset, nsACString &aResult)
752 : {
753 : const char *p, *q, *r;
754 : char *decodedText;
755 : const char *begin; // tracking pointer for where we are in the input buffer
756 0 : PRInt32 isLastEncodedWord = 0;
757 : const char *charsetStart, *charsetEnd;
758 : char charset[80];
759 :
760 : // initialize charset name to an empty string
761 0 : charset[0] = '\0';
762 :
763 0 : begin = aHeader;
764 :
765 : // To avoid buffer realloc, if possible, set capacity in advance. No
766 : // matter what, more than 3x expansion can never happen for all charsets
767 : // supported by Mozilla. SCSU/BCSU with the sliding window set to a
768 : // non-BMP block may be exceptions, but Mozilla does not support them.
769 : // Neither any known mail/news program use them. Even if there's, we're
770 : // safe because we don't use a raw *char any more.
771 0 : aResult.SetCapacity(3 * strlen(aHeader));
772 :
773 0 : while ((p = PL_strstr(begin, "=?")) != 0) {
774 0 : if (isLastEncodedWord) {
775 : // See if it's all whitespace.
776 0 : for (q = begin; q < p; ++q) {
777 0 : if (!PL_strchr(" \t\r\n", *q)) break;
778 : }
779 : }
780 :
781 0 : if (!isLastEncodedWord || q < p) {
782 : // copy the part before the encoded-word
783 0 : CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
784 0 : begin = p;
785 : }
786 :
787 0 : p += 2;
788 :
789 : // Get charset info
790 0 : charsetStart = p;
791 0 : charsetEnd = 0;
792 0 : for (q = p; *q != '?'; q++) {
793 0 : if (*q <= ' ' || PL_strchr(especials, *q)) {
794 0 : goto badsyntax;
795 : }
796 :
797 : // RFC 2231 section 5
798 0 : if (!charsetEnd && *q == '*') {
799 0 : charsetEnd = q;
800 : }
801 : }
802 0 : if (!charsetEnd) {
803 0 : charsetEnd = q;
804 : }
805 :
806 : // Check for too-long charset name
807 0 : if (PRUint32(charsetEnd - charsetStart) >= sizeof(charset))
808 0 : goto badsyntax;
809 :
810 0 : memcpy(charset, charsetStart, charsetEnd - charsetStart);
811 0 : charset[charsetEnd - charsetStart] = 0;
812 :
813 0 : q++;
814 0 : if (*q != 'Q' && *q != 'q' && *q != 'B' && *q != 'b')
815 0 : goto badsyntax;
816 :
817 0 : if (q[1] != '?')
818 0 : goto badsyntax;
819 :
820 0 : r = q;
821 0 : for (r = q + 2; *r != '?'; r++) {
822 0 : if (*r < ' ') goto badsyntax;
823 : }
824 0 : if (r[1] != '=')
825 0 : goto badsyntax;
826 0 : else if (r == q + 2) {
827 : // it's empty, skip
828 0 : begin = r + 2;
829 0 : isLastEncodedWord = 1;
830 0 : continue;
831 : }
832 :
833 0 : if(*q == 'Q' || *q == 'q')
834 0 : decodedText = DecodeQ(q + 2, r - (q + 2));
835 : else {
836 : // bug 227290. ignore an extraneous '=' at the end.
837 : // (# of characters in B-encoded part has to be a multiple of 4)
838 0 : PRInt32 n = r - (q + 2);
839 0 : n -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
840 0 : decodedText = PL_Base64Decode(q + 2, n, nsnull);
841 : }
842 :
843 0 : if (decodedText == nsnull)
844 0 : goto badsyntax;
845 :
846 : // Override charset if requested. Never override labeled UTF-8.
847 : // Use default charset instead of UNKNOWN-8BIT
848 0 : if ((aOverrideCharset && 0 != nsCRT::strcasecmp(charset, "UTF-8")) ||
849 0 : (aDefaultCharset && 0 == nsCRT::strcasecmp(charset, "UNKNOWN-8BIT"))) {
850 0 : PL_strncpy(charset, aDefaultCharset, sizeof(charset) - 1);
851 0 : charset[sizeof(charset) - 1] = '\0';
852 : }
853 :
854 : {
855 : nsCOMPtr<nsIUTF8ConverterService>
856 0 : cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
857 0 : nsCAutoString utf8Text;
858 : // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
859 0 : if (cvtUTF8 &&
860 0 : NS_SUCCEEDED(
861 : cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
862 : charset, IS_7BIT_NON_ASCII_CHARSET(charset), utf8Text))) {
863 0 : aResult.Append(utf8Text);
864 : } else {
865 0 : aResult.Append(REPLACEMENT_CHAR);
866 : }
867 : }
868 0 : PR_Free(decodedText);
869 0 : begin = r + 2;
870 0 : isLastEncodedWord = 1;
871 0 : continue;
872 :
873 : badsyntax:
874 : // copy the part before the encoded-word
875 0 : aResult.Append(begin, p - begin);
876 0 : begin = p;
877 0 : isLastEncodedWord = 0;
878 : }
879 :
880 : // put the tail back
881 0 : CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
882 :
883 0 : nsCAutoString tempStr(aResult);
884 0 : tempStr.ReplaceChar('\t', ' ');
885 0 : aResult = tempStr;
886 :
887 0 : return NS_OK;
888 : }
889 :
|