LCOV - code coverage report
Current view: directory - intl/unicharutil/src - nsSaveAsCharset.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 193 123 63.7 %
Date: 2012-06-02 Functions: 15 14 93.3 %

       1                 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is mozilla.org code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1998
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *   Pierre Phaneuf <pp@ludusdesign.com>
      24                 :  *
      25                 :  * Alternatively, the contents of this file may be used under the terms of
      26                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      27                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      28                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      29                 :  * of those above. If you wish to allow use of your version of this file only
      30                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      31                 :  * use your version of this file under the terms of the MPL, indicate your
      32                 :  * decision by deleting the provisions above and replace them with the notice
      33                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      34                 :  * the provisions above, a recipient may use your version of this file under
      35                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      36                 :  *
      37                 :  * ***** END LICENSE BLOCK ***** */
      38                 : 
      39                 : 
      40                 : #include "prmem.h"
      41                 : #include "prprf.h"
      42                 : #include "nsIServiceManager.h"
      43                 : #include "nsIComponentManager.h"
      44                 : #include "nsICharsetConverterManager.h"
      45                 : #include "nsSaveAsCharset.h"
      46                 : #include "nsCRT.h"
      47                 : #include "nsUnicharUtils.h"
      48                 : #include "nsCompressedCharMap.h"
      49                 : #include "nsReadableUtils.h"
      50                 : #include "nsWhitespaceTokenizer.h"
      51                 : 
      52                 : //
      53                 : // nsISupports methods
      54                 : //
      55              34 : NS_IMPL_ISUPPORTS1(nsSaveAsCharset, nsISaveAsCharset)
      56                 : 
      57                 : //
      58                 : // nsSaveAsCharset
      59                 : //
      60               2 : nsSaveAsCharset::nsSaveAsCharset()
      61                 : {
      62               2 :   mAttribute = attr_htmlTextDefault;
      63               2 :   mEntityVersion = 0;
      64               2 :   mCharsetListIndex = -1;
      65               2 : }
      66                 : 
      67               4 : nsSaveAsCharset::~nsSaveAsCharset()
      68                 : {
      69               8 : }
      70                 : 
      71                 : NS_IMETHODIMP
      72               2 : nsSaveAsCharset::Init(const char *charset, PRUint32 attr, PRUint32 entityVersion)
      73                 : {
      74               2 :   nsresult rv = NS_OK;
      75                 : 
      76               2 :   mAttribute = attr;
      77               2 :   mEntityVersion = entityVersion;
      78                 : 
      79               2 :   rv = SetupCharsetList(charset);
      80               2 :   NS_ENSURE_SUCCESS(rv, rv);
      81                 : 
      82                 :   // set up unicode encoder
      83               2 :   rv = SetupUnicodeEncoder(GetNextCharset());
      84               2 :   NS_ENSURE_SUCCESS(rv, rv);
      85                 : 
      86                 :   // set up entity converter
      87               2 :   if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
      88               2 :     mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
      89                 : 
      90               2 :   return rv;
      91                 : }
      92                 : 
      93                 : NS_IMETHODIMP
      94              11 : nsSaveAsCharset::Convert(const PRUnichar *inString, char **_retval)
      95                 : {
      96              11 :   if (nsnull == _retval)
      97               0 :     return NS_ERROR_NULL_POINTER;
      98              11 :   if (nsnull == inString)
      99               0 :     return NS_ERROR_NULL_POINTER;
     100              11 :   if (0 == *inString)
     101               0 :     return NS_ERROR_ILLEGAL_VALUE;
     102              11 :   nsresult rv = NS_OK;
     103                 : 
     104              11 :   NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
     105              11 :   NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
     106                 : 
     107              11 :   *_retval = nsnull;
     108                 : 
     109                 :   // make sure to start from the first charset in the list
     110              11 :   if (mCharsetListIndex > 0) {
     111               0 :     mCharsetListIndex = -1;
     112               0 :     rv = SetupUnicodeEncoder(GetNextCharset());
     113               0 :     NS_ENSURE_SUCCESS(rv, rv);
     114                 :   }
     115                 : 
     116              11 :   do {
     117                 :     // fallback to the next charset in the list if the last conversion failed by an unmapped character
     118              11 :     if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
     119               0 :       const char * charset = GetNextCharset();
     120               0 :       if (!charset)
     121               0 :         break;
     122               0 :       rv = SetupUnicodeEncoder(charset);
     123               0 :       NS_ENSURE_SUCCESS(rv, rv);
     124               0 :       PR_FREEIF(*_retval);
     125                 :     }
     126                 : 
     127              11 :     if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
     128               0 :       NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
     129               0 :       NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
     130               0 :       PRUnichar *entity = nsnull;
     131                 :       // do the entity conversion first
     132               0 :       rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
     133               0 :       if(NS_SUCCEEDED(rv)) {
     134               0 :         rv = DoCharsetConversion(entity, _retval);
     135               0 :         nsMemory::Free(entity);
     136                 :       }
     137                 :     }
     138                 :     else
     139              11 :       rv = DoCharsetConversion(inString, _retval);
     140                 : 
     141                 :   } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
     142                 : 
     143              11 :   return rv;
     144                 : }
     145                 : 
     146                 : NS_IMETHODIMP 
     147               0 : nsSaveAsCharset::GetCharset(char * *aCharset)
     148                 : {
     149               0 :   NS_ENSURE_ARG(aCharset);
     150               0 :   NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
     151               0 :   NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
     152                 : 
     153               0 :   const char* charset = mCharsetList[mCharsetListIndex].get();
     154               0 :   if (!charset) {
     155               0 :     *aCharset = nsnull;
     156               0 :     NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
     157               0 :     return NS_ERROR_FAILURE;
     158                 :   }
     159                 : 
     160               0 :   *aCharset = nsCRT::strdup(charset);
     161               0 :   return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
     162                 : }
     163                 : 
     164                 : /////////////////////////////////////////////////////////////////////////////////////////
     165                 : 
     166                 : // do the fallback, reallocate the buffer if necessary
     167                 : // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
     168                 : NS_IMETHODIMP
     169              36 : nsSaveAsCharset::HandleFallBack(PRUint32 character, char **outString, PRInt32 *bufferLength, 
     170                 :                                 PRInt32 *currentPos, PRInt32 estimatedLength)
     171                 : {
     172              36 :   if((nsnull == outString ) || (nsnull == bufferLength) ||(nsnull ==currentPos))
     173               0 :     return NS_ERROR_NULL_POINTER;
     174                 :   char fallbackStr[256];
     175              36 :   nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
     176              36 :   if (NS_SUCCEEDED(rv)) {
     177              36 :     PRInt32 tempLen = (PRInt32) PL_strlen(fallbackStr);
     178                 : 
     179                 :     // reallocate if the buffer is not large enough
     180              36 :     if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
     181               0 :       char *temp = (char *) PR_Realloc(*outString, *bufferLength + tempLen);
     182               0 :       if (NULL != temp) {
     183                 :         // adjust length/pointer after realloc
     184               0 :         *bufferLength += tempLen;
     185               0 :         *outString = temp;
     186                 :       } else {
     187               0 :         *outString = NULL;
     188               0 :         *bufferLength =0;
     189               0 :         return NS_ERROR_OUT_OF_MEMORY;
     190                 :       }
     191                 :     }
     192              36 :     memcpy((*outString + *currentPos), fallbackStr, tempLen);
     193              36 :     *currentPos += tempLen;
     194                 :   }
     195              36 :   return rv;
     196                 : }
     197                 : 
     198                 : NS_IMETHODIMP
     199              11 : nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
     200                 : {
     201              11 :   if(nsnull == outString )
     202               0 :     return NS_ERROR_NULL_POINTER;
     203              11 :   NS_ASSERTION(outString, "invalid input");
     204                 : 
     205              11 :   *outString = NULL;
     206                 : 
     207                 :   nsresult rv;
     208              11 :   PRInt32 inStringLength = nsCRT::strlen(inString);   // original input string length
     209                 :   PRInt32 bufferLength;                               // allocated buffer length
     210              11 :   PRInt32 srcLength = inStringLength;
     211                 :   PRInt32 dstLength;
     212              11 :   char *dstPtr = NULL;
     213                 :   PRInt32 pos1, pos2;
     214              11 :   nsresult saveResult = NS_OK;                         // to remember NS_ERROR_UENC_NOMAPPING
     215                 : 
     216                 :   // estimate and allocate the target buffer (reserve extra memory for fallback)
     217              11 :   rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
     218              11 :   if (NS_FAILED(rv)) return rv;
     219                 : 
     220              11 :   bufferLength = dstLength + 512; // reserve 512 byte for fallback.
     221              11 :   dstPtr = (char *) PR_Malloc(bufferLength);
     222              11 :   if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;
     223                 : 
     224                 :   
     225              58 :   for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
     226                 :     // convert from unicode
     227              42 :     dstLength = bufferLength - pos2;
     228              42 :     rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
     229                 : 
     230              42 :     pos1 += srcLength ? srcLength : 1;
     231              42 :     pos2 += dstLength;
     232              42 :     dstPtr[pos2] = '\0';
     233                 : 
     234                 :     // break: this is usually the case (no error) OR unrecoverable error
     235              42 :     if (NS_ERROR_UENC_NOMAPPING != rv) break;
     236                 : 
     237                 :     // remember this happened and reset the result
     238              36 :     saveResult = rv;
     239              36 :     rv = NS_OK;
     240                 : 
     241                 :     // finish encoder, give it a chance to write extra data like escape sequences
     242              36 :     dstLength = bufferLength - pos2;
     243              36 :     rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
     244              36 :     if (NS_SUCCEEDED(rv)) {
     245              36 :       pos2 += dstLength;
     246              36 :       dstPtr[pos2] = '\0';
     247                 :     }
     248                 : 
     249              36 :     srcLength = inStringLength - pos1;
     250                 : 
     251                 :     // do the fallback
     252              36 :     if (!ATTR_NO_FALLBACK(mAttribute)) {
     253                 :       PRUint32 unMappedChar;
     254              37 :       if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && 
     255               1 :           inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
     256               1 :         unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
     257               1 :         pos1++;
     258                 :       } else {
     259              35 :         unMappedChar = inString[pos1-1];
     260                 :       }
     261                 : 
     262              36 :       rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
     263              36 :       if (NS_FAILED(rv)) 
     264               0 :         break;
     265                 : 
     266              36 :       rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
     267              36 :       if (NS_FAILED(rv)) 
     268               0 :         break;
     269              36 :       dstPtr[pos2] = '\0';
     270                 :     }
     271                 :   }
     272                 : 
     273              11 :   if (NS_SUCCEEDED(rv)) {
     274                 :     // finish encoder, give it a chance to write extra data like escape sequences
     275              11 :     dstLength = bufferLength - pos2;
     276              11 :     rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
     277              11 :     if (NS_SUCCEEDED(rv)) {
     278              11 :       pos2 += dstLength;
     279              11 :       dstPtr[pos2] = '\0';
     280                 :     }
     281                 :   }
     282                 : 
     283              11 :   if (NS_FAILED(rv)) {
     284               0 :     PR_FREEIF(dstPtr);
     285               0 :     return rv;
     286                 :   }
     287                 : 
     288              11 :   *outString = dstPtr;      // set the result string
     289                 : 
     290                 :   // set error code so that the caller can do own fall back
     291              11 :   if (NS_ERROR_UENC_NOMAPPING == saveResult) {
     292              10 :     rv = NS_ERROR_UENC_NOMAPPING;
     293                 :   }
     294                 : 
     295              11 :   return rv;
     296                 : }
     297                 : 
     298                 : NS_IMETHODIMP
     299              36 : nsSaveAsCharset::DoConversionFallBack(PRUint32 inUCS4, char *outString, PRInt32 bufferLength)
     300                 : {
     301              36 :   NS_ASSERTION(outString, "invalid input");
     302              36 :   if(nsnull == outString )
     303               0 :     return NS_ERROR_NULL_POINTER;
     304                 : 
     305              36 :   *outString = '\0';
     306                 : 
     307              36 :   nsresult rv = NS_OK;
     308                 : 
     309              36 :   if (ATTR_NO_FALLBACK(mAttribute)) {
     310               0 :     return NS_OK;
     311                 :   }
     312              36 :   if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
     313              36 :     char *entity = NULL;
     314              36 :     rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
     315              36 :     if (NS_SUCCEEDED(rv)) {
     316              36 :       if (NULL == entity || (PRInt32)strlen(entity) > bufferLength) {
     317               0 :         return NS_ERROR_OUT_OF_MEMORY;
     318                 :       }
     319              36 :       PL_strcpy(outString, entity);
     320              36 :       nsMemory::Free(entity);
     321              36 :       return rv;
     322                 :     }
     323                 :   }
     324                 : 
     325               0 :   switch (MASK_FALLBACK(mAttribute)) {
     326                 :   case attr_FallbackQuestionMark:
     327               0 :     if(bufferLength>=2) {
     328               0 :       *outString++='?';
     329               0 :       *outString='\0';
     330               0 :       rv = NS_OK;
     331                 :     } else {
     332               0 :       rv = NS_ERROR_FAILURE;
     333                 :     }
     334               0 :     break;
     335                 :   case attr_FallbackEscapeU:
     336               0 :     if (inUCS4 & 0xff0000)
     337               0 :       rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
     338                 :     else
     339               0 :       rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
     340               0 :     break;
     341                 :   case attr_FallbackDecimalNCR:
     342               0 :     rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
     343               0 :     break;
     344                 :   case attr_FallbackHexNCR:
     345               0 :     rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
     346               0 :     break;
     347                 :   case attr_FallbackNone:
     348               0 :     rv = NS_OK;
     349               0 :     break;
     350                 :   default:
     351               0 :     rv = NS_ERROR_ILLEGAL_VALUE;
     352               0 :     break;
     353                 :   }
     354                 : 
     355               0 :         return rv;
     356                 : }
     357                 : 
     358               2 : nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
     359                 : {
     360               2 :   NS_ENSURE_ARG(charset);
     361                 :   nsresult rv;
     362                 : 
     363                 :   // set up unicode encoder
     364               4 :   nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
     365               2 :   NS_ENSURE_SUCCESS(rv, rv);
     366                 : 
     367               2 :   return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
     368                 : }
     369                 : 
     370               2 : nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
     371                 : {
     372               2 :   NS_ENSURE_ARG(charsetList);
     373                 : 
     374               2 :   NS_ASSERTION(charsetList[0], "charsetList should not be empty");
     375               2 :   if (!charsetList[0])
     376               0 :     return NS_ERROR_INVALID_ARG;
     377                 : 
     378               2 :   if (mCharsetListIndex >= 0) {
     379               0 :     mCharsetList.Clear();
     380               0 :     mCharsetListIndex = -1;
     381                 :   }
     382                 : 
     383               2 :   nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList);
     384               6 :   while (tokenizer.hasMoreTokens()) {
     385               2 :     ParseString(tokenizer.nextToken(), ',', mCharsetList);
     386                 :   }
     387                 : 
     388               2 :   return NS_OK;
     389                 : }
     390                 : 
     391               2 : const char * nsSaveAsCharset::GetNextCharset()
     392                 : {
     393               2 :   if ((mCharsetListIndex + 1) >= PRInt32(mCharsetList.Length()))
     394               0 :     return nsnull;
     395                 : 
     396                 :   // bump the index and return the next charset
     397               2 :   return mCharsetList[++mCharsetListIndex].get();
     398                 : }

Generated by: LCOV version 1.7