LCOV - code coverage report
Current view: directory - extensions/universalchardet/src/base - nsLatin1Prober.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 37 0 0.0 %
Date: 2012-06-02 Functions: 3 0 0.0 %

       1                 : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is Mozilla Universal charset detector code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Netscape Communications Corporation.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 2001
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *          Shy Shalom <shooshX@gmail.com>
      24                 :  *
      25                 :  * Alternatively, the contents of this file may be used under the terms of
      26                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      27                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      28                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      29                 :  * of those above. If you wish to allow use of your version of this file only
      30                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      31                 :  * use your version of this file under the terms of the MPL, indicate your
      32                 :  * decision by deleting the provisions above and replace them with the notice
      33                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      34                 :  * the provisions above, a recipient may use your version of this file under
      35                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      36                 :  *
      37                 :  * ***** END LICENSE BLOCK ***** */
      38                 : 
      39                 : #include "nsLatin1Prober.h"
      40                 : #include "prmem.h"
      41                 : #include <stdio.h>
      42                 : 
      43                 : #define UDF    0        // undefined
      44                 : #define OTH    1        //other
      45                 : #define ASC    2        // ascii capital letter
      46                 : #define ASS    3        // ascii small letter
      47                 : #define ACV    4        // accent capital vowel
      48                 : #define ACO    5        // accent capital other
      49                 : #define ASV    6        // accent small vowel
      50                 : #define ASO    7        // accent small other
      51                 : #define CLASS_NUM   8    // total classes
      52                 : 
      53                 : static const unsigned char Latin1_CharToClass[] = 
      54                 : {
      55                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 00 - 07
      56                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 08 - 0F
      57                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 10 - 17
      58                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 18 - 1F
      59                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 20 - 27
      60                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 28 - 2F
      61                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 30 - 37
      62                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 38 - 3F
      63                 :   OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   // 40 - 47
      64                 :   ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   // 48 - 4F
      65                 :   ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   // 50 - 57
      66                 :   ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   // 58 - 5F
      67                 :   OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   // 60 - 67
      68                 :   ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   // 68 - 6F
      69                 :   ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   // 70 - 77
      70                 :   ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   // 78 - 7F
      71                 :   OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   // 80 - 87
      72                 :   OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   // 88 - 8F
      73                 :   UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 90 - 97
      74                 :   OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   // 98 - 9F
      75                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // A0 - A7
      76                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // A8 - AF
      77                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // B0 - B7
      78                 :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // B8 - BF
      79                 :   ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   // C0 - C7
      80                 :   ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   // C8 - CF
      81                 :   ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   // D0 - D7
      82                 :   ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   // D8 - DF
      83                 :   ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   // E0 - E7
      84                 :   ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   // E8 - EF
      85                 :   ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   // F0 - F7
      86                 :   ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   // F8 - FF
      87                 : };
      88                 : 
      89                 : 
      90                 : /* 0 : illegal 
      91                 :    1 : very unlikely 
      92                 :    2 : normal 
      93                 :    3 : very likely
      94                 : */
      95                 : static const unsigned char Latin1ClassModel[] = 
      96                 : {
      97                 : /*      UDF OTH ASC ASS ACV ACO ASV ASO  */
      98                 : /*UDF*/  0,  0,  0,  0,  0,  0,  0,  0,
      99                 : /*OTH*/  0,  3,  3,  3,  3,  3,  3,  3,
     100                 : /*ASC*/  0,  3,  3,  3,  3,  3,  3,  3, 
     101                 : /*ASS*/  0,  3,  3,  3,  1,  1,  3,  3,
     102                 : /*ACV*/  0,  3,  3,  3,  1,  2,  1,  2,
     103                 : /*ACO*/  0,  3,  3,  3,  3,  3,  3,  3, 
     104                 : /*ASV*/  0,  3,  1,  3,  1,  1,  1,  3, 
     105                 : /*ASO*/  0,  3,  1,  3,  1,  1,  3,  3,
     106                 : };
     107                 : 
     108               0 : void  nsLatin1Prober::Reset(void)
     109                 : {
     110               0 :   mState = eDetecting;
     111               0 :   mLastCharClass = OTH;
     112               0 :   for (int i = 0; i < FREQ_CAT_NUM; i++)
     113               0 :     mFreqCounter[i] = 0;
     114               0 : }
     115                 : 
     116                 : 
     117               0 : nsProbingState nsLatin1Prober::HandleData(const char* aBuf, PRUint32 aLen)
     118                 : {
     119               0 :   char *newBuf1 = 0;
     120               0 :   PRUint32 newLen1 = 0;
     121                 : 
     122               0 :   if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
     123               0 :     newBuf1 = (char*)aBuf;
     124               0 :     newLen1 = aLen;
     125                 :   }
     126                 :   
     127                 :   unsigned char charClass;
     128                 :   unsigned char freq;
     129               0 :   for (PRUint32 i = 0; i < newLen1; i++)
     130                 :   {
     131               0 :     charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
     132               0 :     freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass];
     133               0 :     if (freq == 0) {
     134               0 :       mState = eNotMe;
     135               0 :       break;
     136                 :     }
     137               0 :     mFreqCounter[freq]++;
     138               0 :     mLastCharClass = charClass;
     139                 :   }
     140                 : 
     141               0 :   if (newBuf1 != aBuf)
     142               0 :     PR_FREEIF(newBuf1);
     143                 : 
     144               0 :   return mState;
     145                 : }
     146                 : 
     147               0 : float nsLatin1Prober::GetConfidence(void)
     148                 : {
     149               0 :   if (mState == eNotMe)
     150               0 :     return 0.01f;
     151                 :   
     152                 :   float confidence;
     153               0 :   PRUint32 total = 0;
     154               0 :   for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++)
     155               0 :     total += mFreqCounter[i];
     156                 : 
     157               0 :   if(!total)
     158               0 :     confidence = 0.0f;
     159                 :   else
     160                 :   {
     161               0 :     confidence = mFreqCounter[3]*1.0f / total;
     162               0 :     confidence -= mFreqCounter[1]*20.0f/total;
     163                 :   }
     164                 : 
     165               0 :   if (confidence < 0.0f)
     166               0 :     confidence = 0.0f;
     167                 :   
     168                 :   // lower the confidence of latin1 so that other more accurate detector 
     169                 :   // can take priority.
     170               0 :   confidence *= 0.50f;
     171                 : 
     172               0 :   return confidence;
     173                 : }
     174                 : 
     175                 : #ifdef DEBUG_chardet
     176                 : void  nsLatin1Prober::DumpStatus()
     177                 : {
     178                 :   printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
     179                 : }
     180                 : #endif
     181                 : 
     182                 : 

Generated by: LCOV version 1.7