LCOV - code coverage report
Current view: directory - netwerk/mime - nsMIMEHeaderParamImpl.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 369 207 56.1 %
Date: 2012-06-02 Functions: 15 10 66.7 %

       1                 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* vim:expandtab:shiftwidth=2:tabstop=4:
       3                 :  */
       4                 : /* ***** BEGIN LICENSE BLOCK *****
       5                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       6                 :  *
       7                 :  * The contents of this file are subject to the Mozilla Public License Version
       8                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       9                 :  * the License. You may obtain a copy of the License at
      10                 :  * http://www.mozilla.org/MPL/
      11                 :  *
      12                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      13                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      14                 :  * for the specific language governing rights and limitations under the
      15                 :  * License.
      16                 :  *
      17                 :  * The Original Code is mozilla.org code.
      18                 :  *
      19                 :  * The Initial Developer of the Original Code is
      20                 :  * Netscape Communications Corporation.
      21                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      22                 :  * the Initial Developer. All Rights Reserved.
      23                 :  *
      24                 :  * Contributor(s):
      25                 :  *   rhp@netscape.com
      26                 :  *   Jungshik Shin <jshin@mailaps.org>
      27                 :  *   John G Myers   <jgmyers@netscape.com>
      28                 :  *   Takayuki Tei   <taka@netscape.com>
      29                 :  *
      30                 :  * Alternatively, the contents of this file may be used under the terms of
      31                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      32                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      33                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      34                 :  * of those above. If you wish to allow use of your version of this file only
      35                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      36                 :  * use your version of this file under the terms of the MPL, indicate your
      37                 :  * decision by deleting the provisions above and replace them with the notice
      38                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      39                 :  * the provisions above, a recipient may use your version of this file under
      40                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      41                 :  *
      42                 :  * ***** END LICENSE BLOCK ***** */
      43                 : 
      44                 : #include <string.h>
      45                 : #include "prtypes.h"
      46                 : #include "prmem.h"
      47                 : #include "prprf.h"
      48                 : #include "plstr.h"
      49                 : #include "plbase64.h"
      50                 : #include "nsCRT.h"
      51                 : #include "nsMemory.h"
      52                 : #include "nsCOMPtr.h"
      53                 : #include "nsEscape.h"
      54                 : #include "nsIUTF8ConverterService.h"
      55                 : #include "nsUConvCID.h"
      56                 : #include "nsIServiceManager.h"
      57                 : #include "nsMIMEHeaderParamImpl.h"
      58                 : #include "nsReadableUtils.h"
      59                 : #include "nsNativeCharsetUtils.h"
      60                 : #include "nsNetError.h"
      61                 : 
      62                 : // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
      63                 :   
      64                 : static char *DecodeQ(const char *, PRUint32);
      65                 : static bool Is7bitNonAsciiString(const char *, PRUint32);
      66                 : static void CopyRawHeader(const char *, PRUint32, const char *, nsACString &);
      67                 : static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
      68                 : 
      69                 : // XXX The chance of UTF-7 being used in the message header is really
      70                 : // low, but in theory it's possible. 
      71                 : #define IS_7BIT_NON_ASCII_CHARSET(cset)            \
      72                 :     (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
      73                 :      !nsCRT::strncasecmp((cset), "HZ-GB", 5)    || \
      74                 :      !nsCRT::strncasecmp((cset), "UTF-7", 5))   
      75                 : 
      76             234 : NS_IMPL_ISUPPORTS1(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
      77                 : 
      78                 : NS_IMETHODIMP 
      79             140 : nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, 
      80                 :                                     const char *aParamName,
      81                 :                                     const nsACString& aFallbackCharset, 
      82                 :                                     bool aTryLocaleCharset, 
      83                 :                                     char **aLang, nsAString& aResult)
      84                 : {
      85                 :   return DoGetParameter(aHeaderVal, aParamName, RFC_2231_DECODING,
      86             140 :                         aFallbackCharset, aTryLocaleCharset, aLang, aResult);
      87                 : }
      88                 : 
      89                 : NS_IMETHODIMP 
      90              97 : nsMIMEHeaderParamImpl::GetParameter5987(const nsACString& aHeaderVal, 
      91                 :                                         const char *aParamName,
      92                 :                                         const nsACString& aFallbackCharset, 
      93                 :                                         bool aTryLocaleCharset, 
      94                 :                                         char **aLang, nsAString& aResult)
      95                 : {
      96                 :   return DoGetParameter(aHeaderVal, aParamName, RFC_5987_DECODING,
      97              97 :                         aFallbackCharset, aTryLocaleCharset, aLang, aResult);
      98                 : }
      99                 : 
     100                 : // XXX : aTryLocaleCharset is not yet effective.
     101                 : nsresult 
     102             237 : nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, 
     103                 :                                       const char *aParamName,
     104                 :                                       ParamDecoding aDecoding,
     105                 :                                       const nsACString& aFallbackCharset, 
     106                 :                                       bool aTryLocaleCharset, 
     107                 :                                       char **aLang, nsAString& aResult)
     108                 : {
     109             237 :     aResult.Truncate();
     110                 :     nsresult rv;
     111                 : 
     112                 :     // get parameter (decode RFC 2231/5987 when applicable, as specified by
     113                 :     // aDecoding (5987 being a subset of 2231) and return charset.)
     114             474 :     nsXPIDLCString med;
     115             474 :     nsXPIDLCString charset;
     116             237 :     rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, 
     117             237 :                              aDecoding, getter_Copies(charset), aLang, 
     118             474 :                              getter_Copies(med));
     119             237 :     if (NS_FAILED(rv))
     120              32 :         return rv; 
     121                 : 
     122                 :     // convert to UTF-8 after charset conversion and RFC 2047 decoding 
     123                 :     // if necessary.
     124                 :     
     125             410 :     nsCAutoString str1;
     126             205 :     rv = DecodeParameter(med, charset.get(), nsnull, false, str1);
     127             205 :     NS_ENSURE_SUCCESS(rv, rv);
     128                 : 
     129             205 :     if (!aFallbackCharset.IsEmpty())
     130                 :     {
     131             410 :         nsCAutoString str2;
     132                 :         nsCOMPtr<nsIUTF8ConverterService> 
     133             410 :           cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
     134            1025 :         if (cvtUTF8 &&
     135             820 :             NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, 
     136                 :                 PromiseFlatCString(aFallbackCharset).get(), false, str2))) {
     137             203 :           CopyUTF8toUTF16(str2, aResult);
     138             203 :           return NS_OK;
     139                 :         }
     140                 :     }
     141                 : 
     142               2 :     if (IsUTF8(str1)) {
     143               0 :       CopyUTF8toUTF16(str1, aResult);
     144               0 :       return NS_OK;
     145                 :     }
     146                 : 
     147               2 :     if (aTryLocaleCharset && !NS_IsNativeUTF8()) 
     148               0 :       return NS_CopyNativeToUnicode(str1, aResult);
     149                 : 
     150               2 :     CopyASCIItoUTF16(str1, aResult);
     151               2 :     return NS_OK;
     152                 : }
     153                 : 
     154                 : // remove backslash-encoded sequences from quoted-strings
     155                 : // modifies string in place, potentially shortening it
     156              10 : void RemoveQuotedStringEscapes(char *src)
     157                 : {
     158              10 :   char *dst = src;
     159                 : 
     160              20 :   for (char *c = src; *c; ++c)
     161                 :   {
     162              10 :     if (c[0] == '\\' && c[1])
     163                 :     {
     164                 :       // skip backslash if not at end
     165               6 :       ++c;
     166                 :     }
     167              10 :     *dst++ = *c;
     168                 :   }
     169              10 :   *dst = 0;
     170              10 : }
     171                 : 
     172                 : // moved almost verbatim from mimehdrs.cpp
     173                 : // char *
     174                 : // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
     175                 : //                            char **charset, char **language)
     176                 : //
     177                 : // The format of these header lines  is
     178                 : // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
     179                 : NS_IMETHODIMP 
     180               0 : nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, 
     181                 :                                             const char *aParamName,
     182                 :                                             char **aCharset,
     183                 :                                             char **aLang,
     184                 :                                             char **aResult)
     185                 : {
     186                 :   return DoParameterInternal(aHeaderValue, aParamName, RFC_2231_DECODING,
     187               0 :                              aCharset, aLang, aResult);
     188                 : }
     189                 : 
     190                 : 
     191                 : nsresult 
     192             237 : nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, 
     193                 :                                            const char *aParamName,
     194                 :                                            ParamDecoding aDecoding,
     195                 :                                            char **aCharset,
     196                 :                                            char **aLang,
     197                 :                                            char **aResult)
     198                 : {
     199                 : 
     200             237 :   if (!aHeaderValue ||  !*aHeaderValue || !aResult)
     201              12 :     return NS_ERROR_INVALID_ARG;
     202                 : 
     203             225 :   *aResult = nsnull;
     204                 : 
     205             225 :   if (aCharset) *aCharset = nsnull;
     206             225 :   if (aLang) *aLang = nsnull;
     207                 : 
     208             225 :   const char *str = aHeaderValue;
     209                 : 
     210                 :   // skip leading white space.
     211             225 :   for (; *str &&  nsCRT::IsAsciiSpace(*str); ++str)
     212                 :     ;
     213             225 :   const char *start = str;
     214                 :   
     215                 :   // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
     216                 :   // For instance, return 'inline' in the following case:
     217                 :   // Content-Disposition: inline; filename=.....
     218             225 :   if (!aParamName || !*aParamName) 
     219                 :     {
     220             115 :       for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
     221                 :         ;
     222             115 :       if (str == start)
     223               2 :         return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
     224                 : 
     225             113 :       *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
     226             113 :       NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
     227             113 :       (*aResult)[str - start] = '\0';  // null-terminate
     228             113 :       return NS_OK;
     229                 :     }
     230                 : 
     231                 :   /* Skip forward to first ';' */
     232             110 :   for (; *str && *str != ';' && *str != ','; ++str)
     233                 :     ;
     234             110 :   if (*str)
     235             102 :     str++;
     236                 :   /* Skip over following whitespace */
     237             110 :   for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
     238                 :     ;
     239                 : 
     240                 :   // Some broken http servers just specify parameters
     241                 :   // like 'filename' without specifying disposition
     242                 :   // method. Rewind to the first non-white-space
     243                 :   // character.
     244                 :   
     245             110 :   if (!*str)
     246              10 :     str = start;
     247                 : 
     248                 :   // RFC2231 - The legitimate parm format can be:
     249                 :   // A. title=ThisIsTitle 
     250                 :   // B. title*=us-ascii'en-us'This%20is%20wierd.
     251                 :   // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
     252                 :   //    title*1*=have%20to%20support%20this.
     253                 :   //    title*2="Else..."
     254                 :   // D. title*0="Hey, what you think you are doing?"
     255                 :   //    title*1="There is no charset and lang info."
     256                 :   // RFC5987: only A and B
     257                 :   
     258             110 :   PRInt32 paramLen = strlen(aParamName);
     259                 : 
     260             110 :   bool haveCaseAValue = false;
     261             110 :   PRInt32 nextContinuation = 0; // next value in series, or -1 if error
     262                 : 
     263             462 :   while (*str) {
     264             274 :     const char *tokenStart = str;
     265             274 :     const char *tokenEnd = 0;
     266             274 :     const char *valueStart = str;
     267             274 :     const char *valueEnd = 0;
     268             274 :     bool seenEquals = false;
     269                 : 
     270             274 :     NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
     271                 : 
     272                 :     // Skip forward to the end of this token. 
     273             274 :     for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
     274                 :       ;
     275             274 :     tokenEnd = str;
     276                 : 
     277                 :     // Skip over whitespace, '=', and whitespace
     278             274 :     while (nsCRT::IsAsciiSpace(*str)) ++str;
     279             274 :     if (*str == '=') {
     280             264 :       ++str;
     281             264 :       seenEquals = true;
     282                 :     }
     283             274 :     while (nsCRT::IsAsciiSpace(*str)) ++str;
     284                 : 
     285             274 :     bool needUnquote = false;
     286                 :     
     287             274 :     if (*str != '"')
     288                 :     {
     289                 :       // The value is a token, not a quoted string.
     290             260 :       valueStart = str;
     291            1948 :       for (valueEnd = str;
     292            1688 :            *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
     293                 :            valueEnd++)
     294                 :         ;
     295             260 :       str = valueEnd;
     296                 :     }
     297                 :     else
     298                 :     {
     299                 :       // The value is a quoted string.
     300              14 :       needUnquote = true;
     301                 :       
     302              14 :       ++str;
     303              14 :       valueStart = str;
     304              54 :       for (valueEnd = str; *valueEnd; ++valueEnd)
     305                 :       {
     306              52 :         if (*valueEnd == '\\')
     307               6 :           ++valueEnd;
     308              46 :         else if (*valueEnd == '"')
     309              12 :           break;
     310                 :       }
     311              14 :       str = valueEnd;
     312                 :       // *valueEnd != null means that *valueEnd is quote character.
     313              14 :       if (*valueEnd)
     314              12 :         str++;
     315                 :     }
     316                 : 
     317                 :     // See if this is the simplest case (case A above),
     318                 :     // a 'single' line value with no charset and lang.
     319                 :     // If so, copy it and return.
     320             339 :     if (tokenEnd - tokenStart == paramLen &&
     321                 :         seenEquals &&
     322              65 :         !nsCRT::strncasecmp(tokenStart, aParamName, paramLen))
     323                 :     {
     324              65 :       if (*aResult)
     325                 :       {
     326                 :         // either seen earlier caseA value already--we prefer first--or caseA
     327                 :         // came after a continuation: either way, prefer other value
     328               3 :         goto increment_str;
     329                 :       }
     330                 :       // if the parameter spans across multiple lines we have to strip out the
     331                 :       //     line continuation -- jht 4/29/98 
     332             124 :       nsCAutoString tempStr(valueStart, valueEnd - valueStart);
     333              62 :       tempStr.StripChars("\r\n");
     334              62 :       char *res = ToNewCString(tempStr);
     335              62 :       NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
     336                 :       
     337              62 :       if (needUnquote)
     338              10 :         RemoveQuotedStringEscapes(res);
     339                 :             
     340              62 :       *aResult = res;
     341                 :       
     342             124 :       haveCaseAValue = true;
     343                 :       // keep going, we may find a RFC 2231/5987 encoded alternative
     344                 :     }
     345                 :     // case B, C, and D
     346             611 :     else if (tokenEnd - tokenStart > paramLen &&
     347             204 :              !nsCRT::strncasecmp(tokenStart, aParamName, paramLen) &&
     348                 :              seenEquals &&
     349             198 :              *(tokenStart + paramLen) == '*')
     350                 :     {
     351             198 :       const char *cp = tokenStart + paramLen + 1; // 1st char past '*'
     352             198 :       bool needUnescape = *(tokenEnd - 1) == '*';
     353                 : 
     354             198 :       bool caseB = (tokenEnd - tokenStart) == paramLen + 1;
     355             198 :       bool caseCorDStart = (*cp == '0') && needUnescape;
     356             198 :       bool acceptContinuations = (aDecoding != RFC_5987_DECODING);
     357                 :  
     358                 :       // CaseB and start of CaseC: requires charset and optional language
     359                 :       // in quotes (quotes required even if lang is blank)
     360             198 :       if (caseB || (caseCorDStart && acceptContinuations))
     361                 :       {
     362              39 :         if (caseCorDStart) {
     363               7 :           if (nextContinuation++ != 0)
     364                 :           {
     365                 :             // error: already started a continuation.  Skip future
     366                 :             // continuations and return whatever initial parts were in order.
     367               1 :             nextContinuation = -1;
     368               1 :             goto increment_str;
     369                 :           }
     370                 :         }
     371                 :         // look for single quotation mark(')
     372              38 :         const char *sQuote1 = PL_strchr(valueStart, 0x27);
     373              38 :         const char *sQuote2 = (char *) (sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nsnull);
     374                 : 
     375                 :         // Two single quotation marks must be present even in
     376                 :         // absence of charset and lang. 
     377              38 :         if (!sQuote1 || !sQuote2)
     378               8 :           NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
     379              38 :         if (aCharset && sQuote1 > valueStart && sQuote1 < valueEnd)
     380                 :         {
     381              32 :           *aCharset = (char *) nsMemory::Clone(valueStart, sQuote1 - valueStart + 1);
     382              32 :           if (*aCharset) 
     383              32 :             *(*aCharset + (sQuote1 - valueStart)) = 0;
     384                 :         }
     385              38 :         if (aLang && sQuote1 && sQuote2 && sQuote2 > sQuote1 + 1 &&
     386                 :             sQuote2 < valueEnd)
     387                 :         {
     388               0 :           *aLang = (char *) nsMemory::Clone(sQuote1 + 1, sQuote2 - (sQuote1 + 1) + 1);
     389               0 :           if (*aLang) 
     390               0 :             *(*aLang + (sQuote2 - (sQuote1 + 1))) = 0;
     391                 :         }
     392                 : 
     393                 :         // Be generous and handle gracefully when required 
     394                 :         // single quotes are absent.
     395              38 :         if (sQuote1)
     396                 :         {
     397              34 :           if(!sQuote2)
     398               4 :             sQuote2 = sQuote1;
     399                 :         }
     400                 :         else
     401               4 :           sQuote2 = valueStart - 1;
     402                 : 
     403              38 :         if (sQuote2 && sQuote2 + 1 < valueEnd)
     404                 :         {
     405              38 :           if (*aResult)
     406                 :           {
     407                 :             // caseA value already read, or caseC/D value already read
     408                 :             // but we're now reading caseB: either way, drop old value
     409              14 :             nsMemory::Free(*aResult);
     410              14 :             haveCaseAValue = false;
     411                 :           }
     412              38 :           *aResult = (char *) nsMemory::Alloc(valueEnd - (sQuote2 + 1) + 1);
     413              38 :           if (*aResult)
     414                 :           {
     415              38 :             memcpy(*aResult, sQuote2 + 1, valueEnd - (sQuote2 + 1));
     416              38 :             *(*aResult + (valueEnd - (sQuote2 + 1))) = 0;
     417              38 :             if (needUnescape)
     418                 :             {
     419              38 :               nsUnescape(*aResult);
     420              38 :               if (caseB)
     421              32 :                 return NS_OK; // caseB wins over everything else
     422                 :             }
     423                 :           }
     424               6 :         }
     425                 :       }  // end of if-block :  title*0*=  or  title*= 
     426                 :       // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
     427                 :       // or 2nd or later lines of a caseC param : title*[1-9]*= 
     428             159 :       else if (acceptContinuations && nsCRT::IsAsciiDigit(PRUnichar(*cp)))
     429                 :       {
     430              76 :         PRInt32 nextSegment = atoi(cp);
     431                 :         // no leading zeros allowed except for ... position 0
     432              76 :         bool broken = nextSegment > 0 && *cp == '0';
     433                 :           
     434              76 :         if (broken || nextSegment != nextContinuation++)
     435                 :         {
     436                 :           // error: gap in continuation or unneccessary leading 0.
     437                 :           // Skip future continuations and return whatever initial parts were
     438                 :           // in order.
     439              17 :           nextContinuation = -1;
     440              17 :           goto increment_str;
     441                 :         }
     442              59 :         if (haveCaseAValue && *aResult) 
     443                 :         {
     444                 :           // drop caseA value
     445               4 :           nsMemory::Free(*aResult);
     446               4 :           *aResult = 0;
     447               4 :           haveCaseAValue = false;
     448                 :         }
     449              59 :         PRInt32 len = 0;
     450              59 :         if (*aResult) // 2nd or later lines of multiline parameter
     451                 :         {
     452              49 :           len = strlen(*aResult);
     453              49 :           char *ns = (char *) nsMemory::Realloc(*aResult, len + (valueEnd - valueStart) + 1);
     454              49 :           if (!ns)
     455                 :           {
     456               0 :             nsMemory::Free(*aResult);
     457                 :           }
     458              49 :           *aResult = ns;
     459                 :         }
     460                 :         else 
     461                 :         {
     462              10 :           NS_ASSERTION(*cp == '0', "Not first value in continuation"); // must be; 1st line :  title*0=
     463              10 :           *aResult = (char *) nsMemory::Alloc(valueEnd - valueStart + 1);
     464                 :         }
     465              59 :         if (*aResult)
     466                 :         {
     467                 :           // append a partial value
     468              59 :           memcpy(*aResult + len, valueStart, valueEnd - valueStart);
     469              59 :           *(*aResult + len + (valueEnd - valueStart)) = 0;
     470              59 :           if (needUnescape)
     471               3 :             nsUnescape(*aResult + len);
     472                 :         }
     473                 :         else 
     474               0 :           return NS_ERROR_OUT_OF_MEMORY;
     475                 :       } // end of if-block :  title*[0-9]= or title*[1-9]*=
     476                 :     }
     477                 : 
     478                 :     // str now points after the end of the value.
     479                 :     //   skip over whitespace, ';', whitespace.
     480                 : increment_str:      
     481             242 :     while (nsCRT::IsAsciiSpace(*str)) ++str;
     482             242 :     if (*str == ';') ++str;
     483             242 :     while (nsCRT::IsAsciiSpace(*str)) ++str;
     484                 :   }
     485                 : 
     486              78 :   if (*aResult) 
     487              60 :     return NS_OK;
     488                 :   else
     489              18 :     return NS_ERROR_INVALID_ARG; // aParameter not found !!
     490                 : }
     491                 : 
     492                 : 
     493                 : NS_IMETHODIMP
     494             173 : nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, 
     495                 :                                            const char* aDefaultCharset, 
     496                 :                                            bool aOverrideCharset, 
     497                 :                                            bool aEatContinuations,
     498                 :                                            nsACString& aResult)
     499                 : {
     500             173 :   aResult.Truncate();
     501             173 :   if (!aHeaderVal)
     502               0 :     return NS_ERROR_INVALID_ARG;
     503             173 :   if (!*aHeaderVal)
     504               7 :     return NS_OK;
     505                 : 
     506                 : 
     507                 :   // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string  but
     508                 :   // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
     509                 :   // to UTF-8. Otherwise, just strips away CRLF. 
     510             332 :   if (PL_strstr(aHeaderVal, "=?") || 
     511             166 :       (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || 
     512               0 :       Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal))))) {
     513               0 :     DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
     514             498 :   } else if (aEatContinuations && 
     515             332 :              (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
     516               0 :     aResult = aHeaderVal;
     517                 :   } else {
     518             166 :     aEatContinuations = false;
     519             166 :     aResult = aHeaderVal;
     520                 :   }
     521                 : 
     522             166 :   if (aEatContinuations) {
     523               0 :     nsCAutoString temp(aResult);
     524               0 :     temp.ReplaceSubstring("\n\t", " ");
     525               0 :     temp.ReplaceSubstring("\r\t", " ");
     526               0 :     temp.StripChars("\r\n");
     527               0 :     aResult = temp;
     528                 :   }
     529                 : 
     530             166 :   return NS_OK;
     531                 : }
     532                 : 
     533                 : NS_IMETHODIMP 
     534             205 : nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
     535                 :                                        const char* aCharset,
     536                 :                                        const char* aDefaultCharset,
     537                 :                                        bool aOverrideCharset, 
     538                 :                                        nsACString& aResult)
     539                 : {
     540             205 :   aResult.Truncate();
     541                 :   // If aCharset is given, aParamValue was obtained from RFC2231/5987 
     542                 :   // encoding and we're pretty sure that it's in aCharset.
     543             205 :   if (aCharset && *aCharset)
     544                 :   {
     545              64 :     nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
     546              32 :     if (cvtUTF8)
     547                 :       // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
     548              32 :       return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
     549              32 :           IS_7BIT_NON_ASCII_CHARSET(aCharset), aResult);
     550                 :   }
     551                 : 
     552             346 :   const nsAFlatCString& param = PromiseFlatCString(aParamValue);
     553             346 :   nsCAutoString unQuoted;
     554             173 :   nsACString::const_iterator s, e;
     555             173 :   param.BeginReading(s);
     556             173 :   param.EndReading(e);
     557                 : 
     558                 :   // strip '\' when used to quote CR, LF, '"' and '\'
     559            1702 :   for ( ; s != e; ++s) {
     560            1529 :     if ((*s == '\\')) {
     561               0 :       if (++s == e) {
     562               0 :         --s; // '\' is at the end. move back and append '\'.
     563                 :       }
     564               0 :       else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
     565               0 :         --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
     566                 :       }
     567                 :       // else : skip '\' and append the quoted character.
     568                 :     }
     569            1529 :     unQuoted.Append(*s);
     570                 :   }
     571                 : 
     572             173 :   aResult = unQuoted;
     573                 :   
     574             346 :   nsCAutoString decoded;
     575                 : 
     576                 :   // Try RFC 2047 encoding, instead.
     577                 :   nsresult rv = DecodeRFC2047Header(unQuoted.get(), aDefaultCharset, 
     578             173 :                                     aOverrideCharset, true, decoded);
     579                 :   
     580             173 :   if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
     581             166 :     aResult = decoded;
     582                 :   
     583             173 :   return rv;
     584                 : }
     585                 : 
     586                 : #define ISHEXCHAR(c) \
     587                 :         ((0x30 <= PRUint8(c) && PRUint8(c) <= 0x39)  ||  \
     588                 :          (0x41 <= PRUint8(c) && PRUint8(c) <= 0x46)  ||  \
     589                 :          (0x61 <= PRUint8(c) && PRUint8(c) <= 0x66))
     590                 : 
     591                 : // Decode Q encoding (RFC 2047).
     592                 : // static
     593               0 : char *DecodeQ(const char *in, PRUint32 length)
     594                 : {
     595               0 :   char *out, *dest = 0;
     596                 : 
     597               0 :   out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
     598               0 :   if (dest == nsnull)
     599               0 :     return nsnull;
     600               0 :   while (length > 0) {
     601               0 :     PRUintn c = 0;
     602               0 :     switch (*in) {
     603                 :     case '=':
     604                 :       // check if |in| in the form of '=hh'  where h is [0-9a-fA-F].
     605               0 :       if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
     606                 :         goto badsyntax;
     607               0 :       PR_sscanf(in + 1, "%2X", &c);
     608               0 :       *out++ = (char) c;
     609               0 :       in += 3;
     610               0 :       length -= 3;
     611               0 :       break;
     612                 : 
     613                 :     case '_':
     614               0 :       *out++ = ' ';
     615               0 :       in++;
     616               0 :       length--;
     617               0 :       break;
     618                 : 
     619                 :     default:
     620               0 :       if (*in & 0x80) goto badsyntax;
     621               0 :       *out++ = *in++;
     622               0 :       length--;
     623                 :     }
     624                 :   }
     625               0 :   *out++ = '\0';
     626                 : 
     627               0 :   for (out = dest; *out ; ++out) {
     628               0 :     if (*out == '\t')
     629               0 :       *out = ' ';
     630                 :   }
     631                 : 
     632               0 :   return dest;
     633                 : 
     634                 :  badsyntax:
     635               0 :   PR_Free(dest);
     636               0 :   return nsnull;
     637                 : }
     638                 : 
     639                 : // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) 
     640                 : // or has  ESC which may be an  indication that  it's in one of many ISO 
     641                 : // 2022 7bit  encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
     642                 : // static
     643               0 : bool Is7bitNonAsciiString(const char *input, PRUint32 len)
     644                 : {
     645                 :   PRInt32 c;
     646                 : 
     647                 :   enum { hz_initial, // No HZ seen yet
     648                 :          hz_escaped, // Inside an HZ ~{ escape sequence 
     649                 :          hz_seen, // Have seen at least one complete HZ sequence 
     650                 :          hz_notpresent // Have seen something that is not legal HZ
     651                 :   } hz_state;
     652                 : 
     653               0 :   hz_state = hz_initial;
     654               0 :   while (len) {
     655               0 :     c = PRUint8(*input++);
     656               0 :     len--;
     657               0 :     if (c & 0x80) return false;
     658               0 :     if (c == 0x1B) return true;
     659               0 :     if (c == '~') {
     660               0 :       switch (hz_state) {
     661                 :       case hz_initial:
     662                 :       case hz_seen:
     663               0 :         if (*input == '{') {
     664               0 :           hz_state = hz_escaped;
     665               0 :         } else if (*input == '~') {
     666                 :           // ~~ is the HZ encoding of ~.  Skip over second ~ as well
     667               0 :           hz_state = hz_seen;
     668               0 :           input++;
     669               0 :           len--;
     670                 :         } else {
     671               0 :           hz_state = hz_notpresent;
     672                 :         }
     673               0 :         break;
     674                 : 
     675                 :       case hz_escaped:
     676               0 :         if (*input == '}') hz_state = hz_seen;
     677               0 :         break;
     678                 :       default:
     679               0 :         break;
     680                 :       }
     681                 :     }
     682                 :   }
     683               0 :   return hz_state == hz_seen;
     684                 : }
     685                 : 
     686                 : #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
     687                 : 
     688                 : // copy 'raw' sequences of octets in aInput to aOutput.
     689                 : // If aDefaultCharset is specified, the input is assumed to be in the
     690                 : // charset and converted to UTF-8. Otherwise, a blind copy is made.
     691                 : // If aDefaultCharset is specified, but the conversion to UTF-8
     692                 : // is not successful, each octet is replaced by Unicode replacement
     693                 : // chars. *aOutput is advanced by the number of output octets.
     694                 : // static
     695               0 : void CopyRawHeader(const char *aInput, PRUint32 aLen, 
     696                 :                    const char *aDefaultCharset, nsACString &aOutput)
     697                 : {
     698                 :   PRInt32 c;
     699                 : 
     700                 :   // If aDefaultCharset is not specified, make a blind copy.
     701               0 :   if (!aDefaultCharset || !*aDefaultCharset) {
     702               0 :     aOutput.Append(aInput, aLen);
     703               0 :     return;
     704                 :   }
     705                 : 
     706                 :   // Copy as long as it's US-ASCII.  An ESC may indicate ISO 2022
     707                 :   // A ~ may indicate it is HZ
     708               0 :   while (aLen && (c = PRUint8(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
     709               0 :     aOutput.Append(char(c));
     710               0 :     aLen--;
     711                 :   }
     712               0 :   if (!aLen) {
     713               0 :     return;
     714                 :   }
     715               0 :   aInput--;
     716                 : 
     717                 :   // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
     718                 :   // string and aDefaultCharset is a 7bit non-ascii charset.
     719                 :   bool skipCheck = (c == 0x1B || c == '~') && 
     720               0 :                      IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
     721                 : 
     722                 :   // If not UTF-8, treat as default charset
     723                 :   nsCOMPtr<nsIUTF8ConverterService> 
     724               0 :     cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
     725               0 :   nsCAutoString utf8Text;
     726               0 :   if (cvtUTF8 &&
     727               0 :       NS_SUCCEEDED(
     728                 :       cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), 
     729                 :       aDefaultCharset, skipCheck, utf8Text))) {
     730               0 :     aOutput.Append(utf8Text);
     731                 :   } else { // replace each octet with Unicode replacement char in UTF-8.
     732               0 :     for (PRUint32 i = 0; i < aLen; i++) {
     733               0 :       c = PRUint8(*aInput++);
     734               0 :       if (c & 0x80)
     735               0 :         aOutput.Append(REPLACEMENT_CHAR);
     736                 :       else
     737               0 :         aOutput.Append(char(c));
     738                 :     }
     739                 :   }
     740                 : }
     741                 : 
     742                 : static const char especials[] = "()<>@,;:\\\"/[]?.=";
     743                 : 
     744                 : // |decode_mime_part2_str| taken from comi18n.c
     745                 : // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
     746                 : // If aOverrideCharset is true, charset in RFC2047-encoded words is 
     747                 : // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
     748                 : // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
     749                 : //static
     750               0 : nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, 
     751                 :                           bool aOverrideCharset, nsACString &aResult)
     752                 : {
     753                 :   const char *p, *q, *r;
     754                 :   char *decodedText;
     755                 :   const char *begin; // tracking pointer for where we are in the input buffer
     756               0 :   PRInt32 isLastEncodedWord = 0;
     757                 :   const char *charsetStart, *charsetEnd;
     758                 :   char charset[80];
     759                 : 
     760                 :   // initialize charset name to an empty string
     761               0 :   charset[0] = '\0';
     762                 : 
     763               0 :   begin = aHeader;
     764                 : 
     765                 :   // To avoid buffer realloc, if possible, set capacity in advance. No 
     766                 :   // matter what,  more than 3x expansion can never happen for all charsets
     767                 :   // supported by Mozilla. SCSU/BCSU with the sliding window set to a
     768                 :   // non-BMP block may be exceptions, but Mozilla does not support them. 
     769                 :   // Neither any known mail/news program use them. Even if there's, we're
     770                 :   // safe because we don't use a raw *char any more.
     771               0 :   aResult.SetCapacity(3 * strlen(aHeader));
     772                 : 
     773               0 :   while ((p = PL_strstr(begin, "=?")) != 0) {
     774               0 :     if (isLastEncodedWord) {
     775                 :       // See if it's all whitespace.
     776               0 :       for (q = begin; q < p; ++q) {
     777               0 :         if (!PL_strchr(" \t\r\n", *q)) break;
     778                 :       }
     779                 :     }
     780                 : 
     781               0 :     if (!isLastEncodedWord || q < p) {
     782                 :       // copy the part before the encoded-word
     783               0 :       CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
     784               0 :       begin = p;
     785                 :     }
     786                 : 
     787               0 :     p += 2;
     788                 : 
     789                 :     // Get charset info
     790               0 :     charsetStart = p;
     791               0 :     charsetEnd = 0;
     792               0 :     for (q = p; *q != '?'; q++) {
     793               0 :       if (*q <= ' ' || PL_strchr(especials, *q)) {
     794               0 :         goto badsyntax;
     795                 :       }
     796                 : 
     797                 :       // RFC 2231 section 5
     798               0 :       if (!charsetEnd && *q == '*') {
     799               0 :         charsetEnd = q; 
     800                 :       }
     801                 :     }
     802               0 :     if (!charsetEnd) {
     803               0 :       charsetEnd = q;
     804                 :     }
     805                 : 
     806                 :     // Check for too-long charset name
     807               0 :     if (PRUint32(charsetEnd - charsetStart) >= sizeof(charset)) 
     808               0 :       goto badsyntax;
     809                 :     
     810               0 :     memcpy(charset, charsetStart, charsetEnd - charsetStart);
     811               0 :     charset[charsetEnd - charsetStart] = 0;
     812                 : 
     813               0 :     q++;
     814               0 :     if (*q != 'Q' && *q != 'q' && *q != 'B' && *q != 'b')
     815               0 :       goto badsyntax;
     816                 : 
     817               0 :     if (q[1] != '?')
     818               0 :       goto badsyntax;
     819                 : 
     820               0 :     r = q;
     821               0 :     for (r = q + 2; *r != '?'; r++) {
     822               0 :       if (*r < ' ') goto badsyntax;
     823                 :     }
     824               0 :     if (r[1] != '=')
     825               0 :         goto badsyntax;
     826               0 :     else if (r == q + 2) {
     827                 :         // it's empty, skip
     828               0 :         begin = r + 2;
     829               0 :         isLastEncodedWord = 1;
     830               0 :         continue;
     831                 :     }
     832                 : 
     833               0 :     if(*q == 'Q' || *q == 'q')
     834               0 :       decodedText = DecodeQ(q + 2, r - (q + 2));
     835                 :     else {
     836                 :       // bug 227290. ignore an extraneous '=' at the end.
     837                 :       // (# of characters in B-encoded part has to be a multiple of 4)
     838               0 :       PRInt32 n = r - (q + 2);
     839               0 :       n -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
     840               0 :       decodedText = PL_Base64Decode(q + 2, n, nsnull);
     841                 :     }
     842                 : 
     843               0 :     if (decodedText == nsnull)
     844               0 :       goto badsyntax;
     845                 : 
     846                 :     // Override charset if requested.  Never override labeled UTF-8.
     847                 :     // Use default charset instead of UNKNOWN-8BIT
     848               0 :     if ((aOverrideCharset && 0 != nsCRT::strcasecmp(charset, "UTF-8")) ||
     849               0 :         (aDefaultCharset && 0 == nsCRT::strcasecmp(charset, "UNKNOWN-8BIT"))) {
     850               0 :       PL_strncpy(charset, aDefaultCharset, sizeof(charset) - 1);
     851               0 :       charset[sizeof(charset) - 1] = '\0';
     852                 :     }
     853                 : 
     854                 :     {
     855                 :       nsCOMPtr<nsIUTF8ConverterService> 
     856               0 :         cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
     857               0 :       nsCAutoString utf8Text;
     858                 :       // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
     859               0 :       if (cvtUTF8 &&
     860               0 :           NS_SUCCEEDED(
     861                 :             cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
     862                 :             charset, IS_7BIT_NON_ASCII_CHARSET(charset), utf8Text))) {
     863               0 :         aResult.Append(utf8Text);
     864                 :       } else {
     865               0 :         aResult.Append(REPLACEMENT_CHAR);
     866                 :       }
     867                 :     }
     868               0 :     PR_Free(decodedText);
     869               0 :     begin = r + 2;
     870               0 :     isLastEncodedWord = 1;
     871               0 :     continue;
     872                 : 
     873                 :   badsyntax:
     874                 :     // copy the part before the encoded-word
     875               0 :     aResult.Append(begin, p - begin);
     876               0 :     begin = p;
     877               0 :     isLastEncodedWord = 0;
     878                 :   }
     879                 : 
     880                 :   // put the tail back
     881               0 :   CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
     882                 : 
     883               0 :   nsCAutoString tempStr(aResult);
     884               0 :   tempStr.ReplaceChar('\t', ' ');
     885               0 :   aResult = tempStr;
     886                 : 
     887               0 :   return NS_OK;
     888                 : }
     889                 : 

Generated by: LCOV version 1.7