1 : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsConverterInputStream.h"
39 : #include "nsIInputStream.h"
40 : #include "nsICharsetConverterManager.h"
41 : #include "nsIServiceManager.h"
42 :
43 : #define CONVERTER_BUFFER_SIZE 8192
44 :
45 11265 : NS_IMPL_ISUPPORTS3(nsConverterInputStream, nsIConverterInputStream,
46 : nsIUnicharInputStream, nsIUnicharLineInputStream)
47 :
48 : static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
49 :
50 : NS_IMETHODIMP
51 453 : nsConverterInputStream::Init(nsIInputStream* aStream,
52 : const char *aCharset,
53 : PRInt32 aBufferSize,
54 : PRUnichar aReplacementChar)
55 : {
56 453 : if (!aCharset)
57 0 : aCharset = "UTF-8";
58 :
59 : nsresult rv;
60 :
61 453 : if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE;
62 :
63 : // get the decoder
64 : nsCOMPtr<nsICharsetConverterManager> ccm =
65 906 : do_GetService(kCharsetConverterManagerCID, &rv);
66 453 : if (NS_FAILED(rv)) return rv;
67 :
68 453 : rv = ccm->GetUnicodeDecoder(aCharset ? aCharset : "ISO-8859-1", getter_AddRefs(mConverter));
69 453 : if (NS_FAILED(rv)) return rv;
70 :
71 : // set up our buffers
72 453 : rv = NS_NewByteBuffer(getter_AddRefs(mByteData), nsnull, aBufferSize);
73 453 : if (NS_FAILED(rv)) return rv;
74 :
75 453 : rv = NS_NewUnicharBuffer(getter_AddRefs(mUnicharData), nsnull, aBufferSize);
76 453 : if (NS_FAILED(rv)) return rv;
77 :
78 453 : mInput = aStream;
79 453 : mReplacementChar = aReplacementChar;
80 :
81 453 : return NS_OK;
82 : }
83 :
84 : NS_IMETHODIMP
85 483 : nsConverterInputStream::Close()
86 : {
87 483 : nsresult rv = mInput ? mInput->Close() : NS_OK;
88 483 : PR_FREEIF(mLineBuffer);
89 483 : mInput = nsnull;
90 483 : mConverter = nsnull;
91 483 : mByteData = nsnull;
92 483 : mUnicharData = nsnull;
93 483 : return rv;
94 : }
95 :
96 : NS_IMETHODIMP
97 1057 : nsConverterInputStream::Read(PRUnichar* aBuf,
98 : PRUint32 aCount,
99 : PRUint32 *aReadCount)
100 : {
101 1057 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
102 1057 : PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
103 1057 : if (0 == readCount) {
104 : // Fill the unichar buffer
105 1057 : readCount = Fill(&mLastErrorCode);
106 1057 : if (readCount == 0) {
107 376 : *aReadCount = 0;
108 376 : return mLastErrorCode;
109 : }
110 : }
111 681 : if (readCount > aCount) {
112 0 : readCount = aCount;
113 : }
114 681 : memcpy(aBuf, mUnicharData->GetBuffer() + mUnicharDataOffset,
115 1362 : readCount * sizeof(PRUnichar));
116 681 : mUnicharDataOffset += readCount;
117 681 : *aReadCount = readCount;
118 681 : return NS_OK;
119 : }
120 :
121 : NS_IMETHODIMP
122 0 : nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
123 : void* aClosure,
124 : PRUint32 aCount, PRUint32 *aReadCount)
125 : {
126 0 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
127 0 : PRUint32 bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
128 : nsresult rv;
129 0 : if (0 == bytesToWrite) {
130 : // Fill the unichar buffer
131 0 : bytesToWrite = Fill(&rv);
132 0 : if (bytesToWrite <= 0) {
133 0 : *aReadCount = 0;
134 0 : return rv;
135 : }
136 : }
137 :
138 0 : if (bytesToWrite > aCount)
139 0 : bytesToWrite = aCount;
140 :
141 : PRUint32 bytesWritten;
142 0 : PRUint32 totalBytesWritten = 0;
143 :
144 0 : while (bytesToWrite) {
145 : rv = aWriter(this, aClosure,
146 0 : mUnicharData->GetBuffer() + mUnicharDataOffset,
147 0 : totalBytesWritten, bytesToWrite, &bytesWritten);
148 0 : if (NS_FAILED(rv)) {
149 : // don't propagate errors to the caller
150 0 : break;
151 : }
152 :
153 0 : bytesToWrite -= bytesWritten;
154 0 : totalBytesWritten += bytesWritten;
155 0 : mUnicharDataOffset += bytesWritten;
156 :
157 : }
158 :
159 0 : *aReadCount = totalBytesWritten;
160 :
161 0 : return NS_OK;
162 : }
163 :
164 : NS_IMETHODIMP
165 203 : nsConverterInputStream::ReadString(PRUint32 aCount, nsAString& aString,
166 : PRUint32* aReadCount)
167 : {
168 203 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
169 203 : PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
170 203 : if (0 == readCount) {
171 : // Fill the unichar buffer
172 171 : readCount = Fill(&mLastErrorCode);
173 171 : if (readCount == 0) {
174 79 : *aReadCount = 0;
175 79 : return mLastErrorCode;
176 : }
177 : }
178 124 : if (readCount > aCount) {
179 32 : readCount = aCount;
180 : }
181 124 : const PRUnichar* buf = reinterpret_cast<const PRUnichar*>(mUnicharData->GetBuffer() +
182 124 : mUnicharDataOffset);
183 124 : aString.Assign(buf, readCount);
184 124 : mUnicharDataOffset += readCount;
185 124 : *aReadCount = readCount;
186 124 : return NS_OK;
187 : }
188 :
189 : PRUint32
190 1228 : nsConverterInputStream::Fill(nsresult * aErrorCode)
191 : {
192 1228 : if (nsnull == mInput) {
193 : // We already closed the stream!
194 0 : *aErrorCode = NS_BASE_STREAM_CLOSED;
195 0 : return 0;
196 : }
197 :
198 1228 : if (NS_FAILED(mLastErrorCode)) {
199 : // We failed to completely convert last time, and error-recovery
200 : // is disabled. We will fare no better this time, so...
201 0 : *aErrorCode = mLastErrorCode;
202 0 : return 0;
203 : }
204 :
205 : // We assume a many to one conversion and are using equal sizes for
206 : // the two buffers. However if an error happens at the very start
207 : // of a byte buffer we may end up in a situation where n bytes lead
208 : // to n+1 unicode chars. Thus we need to keep track of the leftover
209 : // bytes as we convert.
210 :
211 1228 : PRInt32 nb = mByteData->Fill(aErrorCode, mInput, mLeftOverBytes);
212 : #if defined(DEBUG_bzbarsky) && 0
213 : for (unsigned int foo = 0; foo < mByteData->GetLength(); ++foo) {
214 : fprintf(stderr, "%c", mByteData->GetBuffer()[foo]);
215 : }
216 : fprintf(stderr, "\n");
217 : #endif
218 1228 : if (nb <= 0 && mLeftOverBytes == 0) {
219 : // No more data
220 447 : *aErrorCode = NS_OK;
221 447 : return 0;
222 : }
223 :
224 781 : NS_ASSERTION(PRUint32(nb) + mLeftOverBytes == mByteData->GetLength(),
225 : "mByteData is lying to us somewhere");
226 :
227 : // Now convert as much of the byte buffer to unicode as possible
228 781 : mUnicharDataOffset = 0;
229 781 : mUnicharDataLength = 0;
230 781 : PRUint32 srcConsumed = 0;
231 1339 : do {
232 813 : PRInt32 srcLen = mByteData->GetLength() - srcConsumed;
233 813 : PRInt32 dstLen = mUnicharData->GetBufferSize() - mUnicharDataLength;
234 1626 : *aErrorCode = mConverter->Convert(mByteData->GetBuffer()+srcConsumed,
235 : &srcLen,
236 813 : mUnicharData->GetBuffer()+mUnicharDataLength,
237 2439 : &dstLen);
238 813 : mUnicharDataLength += dstLen;
239 : // XXX if srcLen is negative, we want to drop the _first_ byte in
240 : // the erroneous byte sequence and try again. This is not quite
241 : // possible right now -- see bug 160784
242 813 : srcConsumed += srcLen;
243 813 : if (NS_FAILED(*aErrorCode) && mReplacementChar) {
244 32 : NS_ASSERTION(0 < mUnicharData->GetBufferSize() - mUnicharDataLength,
245 : "Decoder returned an error but filled the output buffer! "
246 : "Should not happen.");
247 32 : mUnicharData->GetBuffer()[mUnicharDataLength++] = mReplacementChar;
248 32 : ++srcConsumed;
249 : // XXX this is needed to make sure we don't underrun our buffer;
250 : // bug 160784 again
251 32 : srcConsumed = NS_MAX<PRUint32>(srcConsumed, 0);
252 32 : mConverter->Reset();
253 : }
254 813 : NS_ASSERTION(srcConsumed <= mByteData->GetLength(),
255 : "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!");
256 : } while (mReplacementChar &&
257 494 : NS_FAILED(*aErrorCode) &&
258 32 : mUnicharData->GetBufferSize() > mUnicharDataLength);
259 :
260 781 : mLeftOverBytes = mByteData->GetLength() - srcConsumed;
261 :
262 781 : return mUnicharDataLength;
263 : }
264 :
265 : NS_IMETHODIMP
266 6641 : nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult)
267 : {
268 6641 : if (!mLineBuffer) {
269 363 : nsresult rv = NS_InitLineBuffer(&mLineBuffer);
270 363 : if (NS_FAILED(rv)) return rv;
271 : }
272 6641 : return NS_ReadLine(this, mLineBuffer, aLine, aResult);
273 : }
|