1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsUnicharInputStream.h"
39 : #include "nsIInputStream.h"
40 : #include "nsIByteBuffer.h"
41 : #include "nsIUnicharBuffer.h"
42 : #include "nsIServiceManager.h"
43 : #include "nsString.h"
44 : #include "nsAutoPtr.h"
45 : #include "nsCRT.h"
46 : #include "nsUTF8Utils.h"
47 : #include <fcntl.h>
48 : #if defined(XP_WIN)
49 : #include <io.h>
50 : #else
51 : #include <unistd.h>
52 : #endif
53 :
54 : #define STRING_BUFFER_SIZE 8192
55 :
56 : class StringUnicharInputStream : public nsIUnicharInputStream {
57 : public:
58 0 : StringUnicharInputStream(const nsAString& aString) :
59 0 : mString(aString), mPos(0), mLen(aString.Length()) { }
60 :
61 : NS_DECL_ISUPPORTS
62 : NS_DECL_NSIUNICHARINPUTSTREAM
63 :
64 : nsString mString;
65 : PRUint32 mPos;
66 : PRUint32 mLen;
67 :
68 : private:
69 0 : ~StringUnicharInputStream() { }
70 : };
71 :
72 : NS_IMETHODIMP
73 0 : StringUnicharInputStream::Read(PRUnichar* aBuf,
74 : PRUint32 aCount,
75 : PRUint32 *aReadCount)
76 : {
77 0 : if (mPos >= mLen) {
78 0 : *aReadCount = 0;
79 0 : return NS_OK;
80 : }
81 0 : nsAString::const_iterator iter;
82 0 : mString.BeginReading(iter);
83 0 : const PRUnichar* us = iter.get();
84 0 : PRUint32 amount = mLen - mPos;
85 0 : if (amount > aCount) {
86 0 : amount = aCount;
87 : }
88 0 : memcpy(aBuf, us + mPos, sizeof(PRUnichar) * amount);
89 0 : mPos += amount;
90 0 : *aReadCount = amount;
91 0 : return NS_OK;
92 : }
93 :
94 : NS_IMETHODIMP
95 0 : StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
96 : void* aClosure,
97 : PRUint32 aCount, PRUint32 *aReadCount)
98 : {
99 : PRUint32 bytesWritten;
100 0 : PRUint32 totalBytesWritten = 0;
101 :
102 : nsresult rv;
103 0 : aCount = NS_MIN(mString.Length() - mPos, aCount);
104 :
105 0 : nsAString::const_iterator iter;
106 0 : mString.BeginReading(iter);
107 :
108 0 : while (aCount) {
109 0 : rv = aWriter(this, aClosure, iter.get() + mPos,
110 0 : totalBytesWritten, aCount, &bytesWritten);
111 :
112 0 : if (NS_FAILED(rv)) {
113 : // don't propagate errors to the caller
114 0 : break;
115 : }
116 :
117 0 : aCount -= bytesWritten;
118 0 : totalBytesWritten += bytesWritten;
119 0 : mPos += bytesWritten;
120 : }
121 :
122 0 : *aReadCount = totalBytesWritten;
123 :
124 0 : return NS_OK;
125 : }
126 :
127 : NS_IMETHODIMP
128 0 : StringUnicharInputStream::ReadString(PRUint32 aCount, nsAString& aString,
129 : PRUint32* aReadCount)
130 : {
131 0 : if (mPos >= mLen) {
132 0 : *aReadCount = 0;
133 0 : return NS_OK;
134 : }
135 0 : PRUint32 amount = mLen - mPos;
136 0 : if (amount > aCount) {
137 0 : amount = aCount;
138 : }
139 0 : aString = Substring(mString, mPos, amount);
140 0 : mPos += amount;
141 0 : *aReadCount = amount;
142 0 : return NS_OK;
143 : }
144 :
145 0 : nsresult StringUnicharInputStream::Close()
146 : {
147 0 : mPos = mLen;
148 0 : return NS_OK;
149 : }
150 :
151 0 : NS_IMPL_ISUPPORTS1(StringUnicharInputStream, nsIUnicharInputStream)
152 :
153 : //----------------------------------------------------------------------
154 :
155 : class UTF8InputStream : public nsIUnicharInputStream {
156 : public:
157 : UTF8InputStream();
158 : nsresult Init(nsIInputStream* aStream);
159 :
160 : NS_DECL_ISUPPORTS
161 : NS_DECL_NSIUNICHARINPUTSTREAM
162 :
163 : private:
164 : ~UTF8InputStream();
165 :
166 : protected:
167 : PRInt32 Fill(nsresult * aErrorCode);
168 :
169 : static void CountValidUTF8Bytes(const char *aBuf, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUTF16CodeUnits);
170 :
171 : nsCOMPtr<nsIInputStream> mInput;
172 : nsCOMPtr<nsIByteBuffer> mByteData;
173 : nsCOMPtr<nsIUnicharBuffer> mUnicharData;
174 :
175 : PRUint32 mByteDataOffset;
176 : PRUint32 mUnicharDataOffset;
177 : PRUint32 mUnicharDataLength;
178 : };
179 :
180 2297 : UTF8InputStream::UTF8InputStream() :
181 : mByteDataOffset(0),
182 : mUnicharDataOffset(0),
183 2297 : mUnicharDataLength(0)
184 : {
185 2297 : }
186 :
187 : nsresult
188 2297 : UTF8InputStream::Init(nsIInputStream* aStream)
189 : {
190 2297 : nsresult rv = NS_NewByteBuffer(getter_AddRefs(mByteData), nsnull,
191 2297 : STRING_BUFFER_SIZE);
192 2297 : if (NS_FAILED(rv)) return rv;
193 2297 : rv = NS_NewUnicharBuffer(getter_AddRefs(mUnicharData), nsnull,
194 2297 : STRING_BUFFER_SIZE);
195 2297 : if (NS_FAILED(rv)) return rv;
196 :
197 2297 : mInput = aStream;
198 :
199 2297 : return NS_OK;
200 : }
201 :
202 18376 : NS_IMPL_ISUPPORTS1(UTF8InputStream,nsIUnicharInputStream)
203 :
204 4594 : UTF8InputStream::~UTF8InputStream()
205 : {
206 2297 : Close();
207 2297 : }
208 :
209 2297 : nsresult UTF8InputStream::Close()
210 : {
211 2297 : mInput = nsnull;
212 2297 : mByteData = nsnull;
213 2297 : mUnicharData = nsnull;
214 :
215 2297 : return NS_OK;
216 : }
217 :
218 0 : nsresult UTF8InputStream::Read(PRUnichar* aBuf,
219 : PRUint32 aCount,
220 : PRUint32 *aReadCount)
221 : {
222 0 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
223 0 : PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
224 : nsresult errorCode;
225 0 : if (0 == readCount) {
226 : // Fill the unichar buffer
227 0 : PRInt32 bytesRead = Fill(&errorCode);
228 0 : if (bytesRead <= 0) {
229 0 : *aReadCount = 0;
230 0 : return errorCode;
231 : }
232 0 : readCount = bytesRead;
233 : }
234 0 : if (readCount > aCount) {
235 0 : readCount = aCount;
236 : }
237 0 : memcpy(aBuf, mUnicharData->GetBuffer() + mUnicharDataOffset,
238 0 : readCount * sizeof(PRUnichar));
239 0 : mUnicharDataOffset += readCount;
240 0 : *aReadCount = readCount;
241 0 : return NS_OK;
242 : }
243 :
244 : NS_IMETHODIMP
245 9069 : UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
246 : void* aClosure,
247 : PRUint32 aCount, PRUint32 *aReadCount)
248 : {
249 9069 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
250 9069 : PRUint32 bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
251 9069 : nsresult rv = NS_OK;
252 9069 : if (0 == bytesToWrite) {
253 : // Fill the unichar buffer
254 6491 : PRInt32 bytesRead = Fill(&rv);
255 6491 : if (bytesRead <= 0) {
256 2297 : *aReadCount = 0;
257 2297 : return rv;
258 : }
259 4194 : bytesToWrite = bytesRead;
260 : }
261 :
262 6772 : if (bytesToWrite > aCount)
263 2578 : bytesToWrite = aCount;
264 :
265 : PRUint32 bytesWritten;
266 6772 : PRUint32 totalBytesWritten = 0;
267 :
268 20316 : while (bytesToWrite) {
269 : rv = aWriter(this, aClosure,
270 6772 : mUnicharData->GetBuffer() + mUnicharDataOffset,
271 6772 : totalBytesWritten, bytesToWrite, &bytesWritten);
272 :
273 6772 : if (NS_FAILED(rv)) {
274 : // don't propagate errors to the caller
275 0 : break;
276 : }
277 :
278 6772 : bytesToWrite -= bytesWritten;
279 6772 : totalBytesWritten += bytesWritten;
280 6772 : mUnicharDataOffset += bytesWritten;
281 : }
282 :
283 6772 : *aReadCount = totalBytesWritten;
284 :
285 6772 : return NS_OK;
286 : }
287 :
288 : NS_IMETHODIMP
289 0 : UTF8InputStream::ReadString(PRUint32 aCount, nsAString& aString,
290 : PRUint32* aReadCount)
291 : {
292 0 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
293 0 : PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
294 : nsresult errorCode;
295 0 : if (0 == readCount) {
296 : // Fill the unichar buffer
297 0 : PRInt32 bytesRead = Fill(&errorCode);
298 0 : if (bytesRead <= 0) {
299 0 : *aReadCount = 0;
300 0 : return errorCode;
301 : }
302 0 : readCount = bytesRead;
303 : }
304 0 : if (readCount > aCount) {
305 0 : readCount = aCount;
306 : }
307 0 : const PRUnichar* buf = reinterpret_cast<const PRUnichar*>(mUnicharData->GetBuffer() +
308 0 : mUnicharDataOffset);
309 0 : aString.Assign(buf, readCount);
310 :
311 0 : mUnicharDataOffset += readCount;
312 0 : *aReadCount = readCount;
313 0 : return NS_OK;
314 : }
315 :
316 :
317 6491 : PRInt32 UTF8InputStream::Fill(nsresult * aErrorCode)
318 : {
319 6491 : if (nsnull == mInput) {
320 : // We already closed the stream!
321 0 : *aErrorCode = NS_BASE_STREAM_CLOSED;
322 0 : return -1;
323 : }
324 :
325 6491 : NS_ASSERTION(mByteData->GetLength() >= mByteDataOffset, "unsigned madness");
326 6491 : PRUint32 remainder = mByteData->GetLength() - mByteDataOffset;
327 6491 : mByteDataOffset = remainder;
328 6491 : PRInt32 nb = mByteData->Fill(aErrorCode, mInput, remainder);
329 6491 : if (nb <= 0) {
330 : // Because we assume a many to one conversion, the lingering data
331 : // in the byte buffer must be a partial conversion
332 : // fragment. Because we know that we have received no more new
333 : // data to add to it, we can't convert it. Therefore, we discard
334 : // it.
335 2295 : return nb;
336 : }
337 4196 : NS_ASSERTION(remainder + nb == mByteData->GetLength(), "bad nb");
338 :
339 : // Now convert as much of the byte buffer to unicode as possible
340 : PRUint32 srcLen, dstLen;
341 4196 : CountValidUTF8Bytes(mByteData->GetBuffer(),remainder + nb, srcLen, dstLen);
342 :
343 : // the number of UCS2 characters should always be <= the number of
344 : // UTF8 chars
345 4196 : NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer");
346 4196 : NS_ASSERTION(PRInt32(dstLen) <= mUnicharData->GetBufferSize(),
347 : "Ouch. I would overflow my buffer if I wasn't so careful.");
348 4196 : if (PRInt32(dstLen) > mUnicharData->GetBufferSize()) return 0;
349 :
350 4196 : ConvertUTF8toUTF16 converter(mUnicharData->GetBuffer());
351 :
352 4196 : nsASingleFragmentCString::const_char_iterator start = mByteData->GetBuffer();
353 4196 : nsASingleFragmentCString::const_char_iterator end = mByteData->GetBuffer() + srcLen;
354 :
355 4196 : copy_string(start, end, converter);
356 4196 : if (converter.Length() != dstLen) {
357 2 : *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION;
358 2 : return -1;
359 : }
360 :
361 4194 : mUnicharDataOffset = 0;
362 4194 : mUnicharDataLength = dstLen;
363 4194 : mByteDataOffset = srcLen;
364 :
365 4194 : return dstLen;
366 : }
367 :
368 : void
369 4196 : UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUTF16CodeUnits)
370 : {
371 4196 : const char *c = aBuffer;
372 4196 : const char *end = aBuffer + aMaxBytes;
373 4196 : const char *lastchar = c; // pre-initialize in case of 0-length buffer
374 4196 : PRUint32 utf16length = 0;
375 22738526 : while (c < end && *c) {
376 22730135 : lastchar = c;
377 22730135 : utf16length++;
378 :
379 22730135 : if (UTF8traits::isASCII(*c))
380 22729910 : c++;
381 225 : else if (UTF8traits::is2byte(*c))
382 2 : c += 2;
383 223 : else if (UTF8traits::is3byte(*c))
384 221 : c += 3;
385 2 : else if (UTF8traits::is4byte(*c)) {
386 1 : c += 4;
387 1 : utf16length++; // add 1 more because this will be converted to a
388 : // surrogate pair.
389 : }
390 1 : else if (UTF8traits::is5byte(*c))
391 0 : c += 5;
392 1 : else if (UTF8traits::is6byte(*c))
393 0 : c += 6;
394 : else {
395 1 : NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
396 1 : break; // Otherwise we go into an infinite loop. But what happens now?
397 : }
398 : }
399 4196 : if (c > end) {
400 1 : c = lastchar;
401 1 : utf16length--;
402 : }
403 :
404 4196 : aValidUTF8bytes = c - aBuffer;
405 4196 : aValidUTF16CodeUnits = utf16length;
406 4196 : }
407 :
408 0 : NS_IMPL_QUERY_INTERFACE2(nsSimpleUnicharStreamFactory,
409 : nsIFactory,
410 : nsISimpleUnicharStreamFactory)
411 :
412 0 : NS_IMETHODIMP_(nsrefcnt) nsSimpleUnicharStreamFactory::AddRef() { return 2; }
413 0 : NS_IMETHODIMP_(nsrefcnt) nsSimpleUnicharStreamFactory::Release() { return 1; }
414 :
415 : NS_IMETHODIMP
416 0 : nsSimpleUnicharStreamFactory::CreateInstance(nsISupports* aOuter, REFNSIID aIID,
417 : void **aResult)
418 : {
419 0 : return NS_ERROR_NOT_IMPLEMENTED;
420 : }
421 :
422 : NS_IMETHODIMP
423 0 : nsSimpleUnicharStreamFactory::LockFactory(bool aLock)
424 : {
425 0 : return NS_OK;
426 : }
427 :
428 : NS_IMETHODIMP
429 0 : nsSimpleUnicharStreamFactory::CreateInstanceFromString(const nsAString& aString,
430 : nsIUnicharInputStream* *aResult)
431 : {
432 0 : StringUnicharInputStream* it = new StringUnicharInputStream(aString);
433 0 : if (!it) {
434 0 : return NS_ERROR_OUT_OF_MEMORY;
435 : }
436 :
437 0 : NS_ADDREF(*aResult = it);
438 0 : return NS_OK;
439 : }
440 :
441 : NS_IMETHODIMP
442 2297 : nsSimpleUnicharStreamFactory::CreateInstanceFromUTF8Stream(nsIInputStream* aStreamToWrap,
443 : nsIUnicharInputStream* *aResult)
444 : {
445 2297 : *aResult = nsnull;
446 :
447 : // Create converter input stream
448 4594 : nsRefPtr<UTF8InputStream> it = new UTF8InputStream();
449 2297 : if (!it)
450 0 : return NS_ERROR_OUT_OF_MEMORY;
451 :
452 2297 : nsresult rv = it->Init(aStreamToWrap);
453 2297 : if (NS_FAILED(rv))
454 0 : return rv;
455 :
456 2297 : NS_ADDREF(*aResult = it);
457 2297 : return NS_OK;
458 : }
459 :
460 : nsSimpleUnicharStreamFactory*
461 2297 : nsSimpleUnicharStreamFactory::GetInstance()
462 : {
463 2297 : return const_cast<nsSimpleUnicharStreamFactory*>(&kInstance);
464 : }
465 :
466 : const nsSimpleUnicharStreamFactory
467 2928 : nsSimpleUnicharStreamFactory::kInstance;
|