1 : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Pierre Phaneuf <pp@ludusdesign.com>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either of the GNU General Public License Version 2 or later (the "GPL"),
27 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 : #include "nscore.h"
39 : #include "nsCyrillicProb.h"
40 : #include <stdio.h>
41 :
42 : #include "nsCOMPtr.h"
43 : #include "nsISupports.h"
44 : #include "nsICharsetDetector.h"
45 : #include "nsCharDetDll.h"
46 : #include "nsCyrillicDetector.h"
47 :
48 : //----------------------------------------------------------------------
49 : // Interface nsISupports [implementation]
50 0 : NS_IMPL_ISUPPORTS1(nsCyrXPCOMDetector, nsICharsetDetector)
51 0 : NS_IMPL_ISUPPORTS1(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)
52 :
53 0 : void nsCyrillicDetector::HandleData(const char* aBuf, PRUint32 aLen)
54 : {
55 : PRUint8 cls;
56 : const char* b;
57 : PRUint32 i;
58 0 : if(mDone)
59 0 : return;
60 0 : for(i=0, b=aBuf;i<aLen;i++,b++)
61 : {
62 0 : for(PRUintn j=0;j<mItems;j++)
63 : {
64 0 : if( 0x80 & *b)
65 0 : cls = mCyrillicClass[j][(*b) & 0x7F];
66 : else
67 0 : cls = 0;
68 0 : NS_ASSERTION( cls <= 32 , "illegal character class");
69 0 : mProb[j] += gCyrillicProb[mLastCls[j]][cls];
70 0 : mLastCls[j] = cls;
71 : }
72 : }
73 : // We now only based on the first block we receive
74 0 : DataEnd();
75 : }
76 :
77 : //---------------------------------------------------------------------
78 : #define THRESHOLD_RATIO 1.5f
79 0 : void nsCyrillicDetector::DataEnd()
80 : {
81 0 : PRUint32 max=0;
82 0 : PRUint8 maxIdx=0;
83 : PRUint8 j;
84 0 : if(mDone)
85 0 : return;
86 0 : for(j=0;j<mItems;j++) {
87 0 : if(mProb[j] > max)
88 : {
89 0 : max = mProb[j];
90 0 : maxIdx= j;
91 : }
92 : }
93 :
94 0 : if( 0 == max ) // if we didn't get any 8 bits data
95 0 : return;
96 :
97 : #ifdef DEBUG
98 0 : for(j=0;j<mItems;j++)
99 0 : printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
100 : #endif
101 0 : this->Report(mCharsets[maxIdx]);
102 0 : mDone = true;
103 : }
104 :
105 : //---------------------------------------------------------------------
106 0 : nsCyrXPCOMDetector:: nsCyrXPCOMDetector(PRUint8 aItems,
107 : const PRUint8 ** aCyrillicClass,
108 : const char **aCharsets)
109 0 : : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
110 : {
111 0 : mObserver = nsnull;
112 0 : }
113 :
114 : //---------------------------------------------------------------------
115 0 : nsCyrXPCOMDetector::~nsCyrXPCOMDetector()
116 : {
117 0 : }
118 :
119 : //---------------------------------------------------------------------
120 0 : NS_IMETHODIMP nsCyrXPCOMDetector::Init(
121 : nsICharsetDetectionObserver* aObserver)
122 : {
123 0 : NS_ASSERTION(mObserver == nsnull , "Init twice");
124 0 : if(nsnull == aObserver)
125 0 : return NS_ERROR_ILLEGAL_VALUE;
126 :
127 0 : mObserver = aObserver;
128 0 : return NS_OK;
129 : }
130 :
131 : //----------------------------------------------------------
132 0 : NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
133 : const char* aBuf, PRUint32 aLen, bool* oDontFeedMe)
134 : {
135 0 : NS_ASSERTION(mObserver != nsnull , "have not init yet");
136 :
137 0 : if((nsnull == aBuf) || (nsnull == oDontFeedMe))
138 0 : return NS_ERROR_ILLEGAL_VALUE;
139 :
140 0 : this->HandleData(aBuf, aLen);
141 0 : *oDontFeedMe = false;
142 0 : return NS_OK;
143 : }
144 :
145 : //----------------------------------------------------------
146 0 : NS_IMETHODIMP nsCyrXPCOMDetector::Done()
147 : {
148 0 : NS_ASSERTION(mObserver != nsnull , "have not init yet");
149 0 : this->DataEnd();
150 0 : return NS_OK;
151 : }
152 :
153 : //----------------------------------------------------------
154 0 : void nsCyrXPCOMDetector::Report(const char* aCharset)
155 : {
156 0 : NS_ASSERTION(mObserver != nsnull , "have not init yet");
157 0 : mObserver->Notify(aCharset, eBestAnswer);
158 0 : }
159 :
160 : //---------------------------------------------------------------------
161 0 : nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(PRUint8 aItems,
162 : const PRUint8 ** aCyrillicClass,
163 : const char **aCharsets)
164 0 : : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
165 : {
166 0 : }
167 :
168 : //---------------------------------------------------------------------
169 0 : nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector()
170 : {
171 0 : }
172 :
173 : //---------------------------------------------------------------------
174 0 : void nsCyrXPCOMStringDetector::Report(const char *aCharset)
175 : {
176 0 : mResult = aCharset;
177 0 : }
178 :
179 : //---------------------------------------------------------------------
180 0 : NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, PRUint32 aLen,
181 : const char** oCharset, nsDetectionConfident &oConf)
182 : {
183 0 : mResult = nsnull;
184 0 : mDone = false;
185 0 : this->HandleData(aBuf, aLen);
186 0 : this->DataEnd();
187 0 : *oCharset=mResult;
188 0 : oConf = eBestAnswer;
189 0 : return NS_OK;
190 : }
191 :
192 :
|