1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 2000
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Scott Collins <scc@mozilla.org> (original author)
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either of the GNU General Public License Version 2 or later (the "GPL"),
27 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 : #include "nsReadableUtils.h"
40 : #include "nsMemory.h"
41 : #include "nsString.h"
42 : #include "nsUTF8Utils.h"
43 :
44 : void
45 8975 : LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )
46 : {
47 8975 : aDest.Truncate();
48 8975 : LossyAppendUTF16toASCII(aSource, aDest);
49 8975 : }
50 :
51 : void
52 55473 : CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
53 : {
54 55473 : aDest.Truncate();
55 55473 : AppendASCIItoUTF16(aSource, aDest);
56 55473 : }
57 :
58 : void
59 22 : LossyCopyUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
60 : {
61 22 : aDest.Truncate();
62 22 : if (aSource) {
63 22 : LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
64 : }
65 22 : }
66 :
67 : void
68 1241 : CopyASCIItoUTF16( const char* aSource, nsAString& aDest )
69 : {
70 1241 : aDest.Truncate();
71 1241 : if (aSource) {
72 1241 : AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
73 : }
74 1241 : }
75 :
76 : void
77 619232 : CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
78 : {
79 619232 : aDest.Truncate();
80 619232 : AppendUTF16toUTF8(aSource, aDest);
81 619232 : }
82 :
83 : void
84 72423 : CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
85 : {
86 72423 : aDest.Truncate();
87 72423 : AppendUTF8toUTF16(aSource, aDest);
88 72423 : }
89 :
90 : void
91 559 : CopyUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
92 : {
93 559 : aDest.Truncate();
94 559 : AppendUTF16toUTF8(aSource, aDest);
95 559 : }
96 :
97 : void
98 0 : CopyUTF8toUTF16( const char* aSource, nsAString& aDest )
99 : {
100 0 : aDest.Truncate();
101 0 : AppendUTF8toUTF16(aSource, aDest);
102 0 : }
103 :
104 : // Like GetMutableData, but returns false if it can't
105 : // allocate enough memory (e.g. due to OOM) rather than
106 : // returning zero (which could have other meanings) and
107 : // throws away the out-param pointer.
108 : bool
109 695785 : SetLengthForWriting(nsAString& aDest, PRUint32 aDesiredLength)
110 : {
111 : PRUnichar* dummy;
112 695785 : PRUint32 len = aDest.GetMutableData(&dummy, aDesiredLength);
113 695785 : return (len >= aDesiredLength);
114 : }
115 :
116 : bool
117 1235574 : SetLengthForWritingC(nsACString& aDest, PRUint32 aDesiredLength)
118 : {
119 : char* dummy;
120 1235574 : PRUint32 len = aDest.GetMutableData(&dummy, aDesiredLength);
121 1235574 : return (len >= aDesiredLength);
122 : }
123 :
124 :
125 : void
126 9797 : LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest )
127 : {
128 9797 : PRUint32 old_dest_length = aDest.Length();
129 9797 : if (!SetLengthForWritingC(aDest, old_dest_length + aSource.Length()))
130 0 : return;
131 :
132 9797 : nsAString::const_iterator fromBegin, fromEnd;
133 :
134 9797 : nsACString::iterator dest;
135 9797 : aDest.BeginWriting(dest);
136 :
137 9797 : dest.advance(old_dest_length);
138 :
139 : // right now, this won't work on multi-fragment destinations
140 9797 : LossyConvertEncoding16to8 converter(dest.get());
141 :
142 9797 : copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
143 : }
144 :
145 : void
146 584072 : AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
147 : {
148 584072 : PRUint32 old_dest_length = aDest.Length();
149 584072 : if (!SetLengthForWriting(aDest, old_dest_length + aSource.Length()))
150 0 : return;
151 :
152 584072 : nsACString::const_iterator fromBegin, fromEnd;
153 :
154 584072 : nsAString::iterator dest;
155 584072 : aDest.BeginWriting(dest);
156 :
157 584072 : dest.advance(old_dest_length);
158 :
159 : // right now, this won't work on multi-fragment destinations
160 584072 : LossyConvertEncoding8to16 converter(dest.get());
161 :
162 584072 : copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
163 : }
164 :
165 : void
166 55 : LossyAppendUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
167 : {
168 55 : if (aSource) {
169 55 : LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
170 : }
171 55 : }
172 :
173 : void
174 512908 : AppendASCIItoUTF16( const char* aSource, nsAString& aDest )
175 : {
176 512908 : if (aSource) {
177 512908 : AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
178 : }
179 512908 : }
180 :
181 : void
182 1229432 : AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
183 : {
184 1229432 : nsAString::const_iterator source_start, source_end;
185 1229432 : CalculateUTF8Size calculator;
186 1229432 : copy_string(aSource.BeginReading(source_start),
187 2458864 : aSource.EndReading(source_end), calculator);
188 :
189 1229432 : PRUint32 count = calculator.Size();
190 :
191 1229432 : if (count)
192 : {
193 1225777 : PRUint32 old_dest_length = aDest.Length();
194 :
195 : // Grow the buffer if we need to.
196 1225777 : if(!SetLengthForWritingC(aDest, old_dest_length + count))
197 0 : return;
198 :
199 : // All ready? Time to convert
200 :
201 1225777 : ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);
202 1225777 : copy_string(aSource.BeginReading(source_start),
203 2451554 : aSource.EndReading(source_end), converter);
204 :
205 1225777 : NS_ASSERTION(converter.Size() == count,
206 : "Unexpected disparity between CalculateUTF8Size and "
207 : "ConvertUTF16toUTF8");
208 : }
209 : }
210 :
211 : void
212 112670 : AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
213 : {
214 112670 : nsACString::const_iterator source_start, source_end;
215 112670 : CalculateUTF8Length calculator;
216 112670 : copy_string(aSource.BeginReading(source_start),
217 225340 : aSource.EndReading(source_end), calculator);
218 :
219 112670 : PRUint32 count = calculator.Length();
220 :
221 : // Avoid making the string mutable if we're appending an empty string
222 112670 : if (count)
223 : {
224 111713 : PRUint32 old_dest_length = aDest.Length();
225 :
226 : // Grow the buffer if we need to.
227 111713 : if(!SetLengthForWriting(aDest, old_dest_length + count))
228 0 : return;
229 :
230 : // All ready? Time to convert
231 :
232 111713 : ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
233 111713 : copy_string(aSource.BeginReading(source_start),
234 223426 : aSource.EndReading(source_end), converter);
235 :
236 111713 : NS_ASSERTION(converter.ErrorEncountered() ||
237 : converter.Length() == count,
238 : "CalculateUTF8Length produced the wrong length");
239 :
240 111713 : if (converter.ErrorEncountered())
241 : {
242 0 : NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
243 0 : aDest.SetLength(old_dest_length);
244 : }
245 : }
246 : }
247 :
248 : void
249 50630 : AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
250 : {
251 50630 : if (aSource) {
252 50183 : AppendUTF16toUTF8(nsDependentString(aSource), aDest);
253 : }
254 50630 : }
255 :
256 : void
257 3469 : AppendUTF8toUTF16( const char* aSource, nsAString& aDest )
258 : {
259 3469 : if (aSource) {
260 3469 : AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
261 : }
262 3469 : }
263 :
264 :
265 : /**
266 : * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
267 : *
268 : * @param aSource an string you will eventually be making a copy of
269 : * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
270 : *
271 : */
272 : template <class FromStringT, class ToCharT>
273 : inline
274 : ToCharT*
275 794061 : AllocateStringCopy( const FromStringT& aSource, ToCharT* )
276 : {
277 794061 : return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT)));
278 : }
279 :
280 :
281 : char*
282 36 : ToNewCString( const nsAString& aSource )
283 : {
284 36 : char* result = AllocateStringCopy(aSource, (char*)0);
285 36 : if (!result)
286 0 : return nsnull;
287 :
288 36 : nsAString::const_iterator fromBegin, fromEnd;
289 36 : LossyConvertEncoding16to8 converter(result);
290 36 : copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
291 36 : return result;
292 : }
293 :
294 : char*
295 2088 : ToNewUTF8String( const nsAString& aSource, PRUint32 *aUTF8Count )
296 : {
297 2088 : nsAString::const_iterator start, end;
298 2088 : CalculateUTF8Size calculator;
299 2088 : copy_string(aSource.BeginReading(start), aSource.EndReading(end),
300 2088 : calculator);
301 :
302 2088 : if (aUTF8Count)
303 0 : *aUTF8Count = calculator.Size();
304 :
305 : char *result = static_cast<char*>
306 2088 : (nsMemory::Alloc(calculator.Size() + 1));
307 2088 : if (!result)
308 0 : return nsnull;
309 :
310 2088 : ConvertUTF16toUTF8 converter(result);
311 2088 : copy_string(aSource.BeginReading(start), aSource.EndReading(end),
312 2088 : converter).write_terminator();
313 2088 : NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
314 :
315 2088 : return result;
316 : }
317 :
318 : char*
319 13868 : ToNewCString( const nsACString& aSource )
320 : {
321 : // no conversion needed, just allocate a buffer of the correct length and copy into it
322 :
323 13868 : char* result = AllocateStringCopy(aSource, (char*)0);
324 13868 : if (!result)
325 0 : return nsnull;
326 :
327 13868 : nsACString::const_iterator fromBegin, fromEnd;
328 13868 : char* toBegin = result;
329 13868 : *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0);
330 13868 : return result;
331 : }
332 :
333 : PRUnichar*
334 11471 : ToNewUnicode( const nsAString& aSource )
335 : {
336 : // no conversion needed, just allocate a buffer of the correct length and copy into it
337 :
338 11471 : PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
339 11471 : if (!result)
340 0 : return nsnull;
341 :
342 11471 : nsAString::const_iterator fromBegin, fromEnd;
343 11471 : PRUnichar* toBegin = result;
344 11471 : *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = PRUnichar(0);
345 11471 : return result;
346 : }
347 :
348 : PRUnichar*
349 768686 : ToNewUnicode( const nsACString& aSource )
350 : {
351 768686 : PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
352 768686 : if (!result)
353 0 : return nsnull;
354 :
355 768686 : nsACString::const_iterator fromBegin, fromEnd;
356 768686 : LossyConvertEncoding8to16 converter(result);
357 768686 : copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
358 768686 : return result;
359 : }
360 :
361 : PRUnichar*
362 102931 : UTF8ToNewUnicode( const nsACString& aSource, PRUint32 *aUTF16Count )
363 : {
364 102931 : nsACString::const_iterator start, end;
365 102931 : CalculateUTF8Length calculator;
366 102931 : copy_string(aSource.BeginReading(start), aSource.EndReading(end),
367 102931 : calculator);
368 :
369 102931 : if (aUTF16Count)
370 102675 : *aUTF16Count = calculator.Length();
371 :
372 : PRUnichar *result = static_cast<PRUnichar*>
373 102931 : (nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
374 102931 : if (!result)
375 0 : return nsnull;
376 :
377 102931 : ConvertUTF8toUTF16 converter(result);
378 102931 : copy_string(aSource.BeginReading(start), aSource.EndReading(end),
379 102931 : converter).write_terminator();
380 102931 : NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
381 :
382 102931 : return result;
383 : }
384 :
385 : PRUnichar*
386 1278833 : CopyUnicodeTo( const nsAString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength )
387 : {
388 1278833 : nsAString::const_iterator fromBegin, fromEnd;
389 1278833 : PRUnichar* toBegin = aDest;
390 1278833 : copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), toBegin);
391 1278833 : return aDest;
392 : }
393 :
394 : void
395 0 : CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
396 : const nsAString::const_iterator& aSrcEnd,
397 : nsAString& aDest )
398 : {
399 0 : nsAString::iterator writer;
400 0 : if (!SetLengthForWriting(aDest, Distance(aSrcStart, aSrcEnd)))
401 0 : return;
402 :
403 0 : aDest.BeginWriting(writer);
404 0 : nsAString::const_iterator fromBegin(aSrcStart);
405 :
406 0 : copy_string(fromBegin, aSrcEnd, writer);
407 : }
408 :
409 : void
410 0 : AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
411 : const nsAString::const_iterator& aSrcEnd,
412 : nsAString& aDest )
413 : {
414 0 : nsAString::iterator writer;
415 0 : PRUint32 oldLength = aDest.Length();
416 0 : if(!SetLengthForWriting(aDest, oldLength + Distance(aSrcStart, aSrcEnd)))
417 0 : return;
418 :
419 0 : aDest.BeginWriting(writer).advance(oldLength);
420 0 : nsAString::const_iterator fromBegin(aSrcStart);
421 :
422 0 : copy_string(fromBegin, aSrcEnd, writer);
423 : }
424 :
425 : bool
426 10000 : IsASCII( const nsAString& aString )
427 : {
428 : static const PRUnichar NOT_ASCII = PRUnichar(~0x007F);
429 :
430 :
431 : // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
432 :
433 10000 : nsAString::const_iterator iter, done_reading;
434 10000 : aString.BeginReading(iter);
435 10000 : aString.EndReading(done_reading);
436 :
437 10000 : const PRUnichar* c = iter.get();
438 10000 : const PRUnichar* end = done_reading.get();
439 :
440 10000 : while ( c < end )
441 : {
442 46205 : if ( *c++ & NOT_ASCII )
443 1037 : return false;
444 : }
445 :
446 8963 : return true;
447 : }
448 :
449 : bool
450 1385147 : IsASCII( const nsACString& aString )
451 : {
452 : static const char NOT_ASCII = char(~0x7F);
453 :
454 :
455 : // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
456 :
457 1385147 : nsACString::const_iterator iter, done_reading;
458 1385147 : aString.BeginReading(iter);
459 1385147 : aString.EndReading(done_reading);
460 :
461 1385147 : const char* c = iter.get();
462 1385147 : const char* end = done_reading.get();
463 :
464 1385147 : while ( c < end )
465 : {
466 13652149 : if ( *c++ & NOT_ASCII )
467 310 : return false;
468 : }
469 :
470 1384837 : return true;
471 : }
472 :
473 : bool
474 63825 : IsUTF8( const nsACString& aString, bool aRejectNonChar )
475 : {
476 63825 : nsReadingIterator<char> done_reading;
477 63825 : aString.EndReading(done_reading);
478 :
479 63825 : PRInt32 state = 0;
480 63825 : bool overlong = false;
481 63825 : bool surrogate = false;
482 63825 : bool nonchar = false;
483 63825 : PRUint16 olupper = 0; // overlong byte upper bound.
484 63825 : PRUint16 slower = 0; // surrogate byte lower bound.
485 :
486 63825 : nsReadingIterator<char> iter;
487 63825 : aString.BeginReading(iter);
488 :
489 63825 : const char* ptr = iter.get();
490 63825 : const char* end = done_reading.get();
491 5497238 : while ( ptr < end )
492 : {
493 : PRUint8 c;
494 :
495 5369595 : if (0 == state)
496 : {
497 5369595 : c = *ptr++;
498 :
499 5369595 : if ( UTF8traits::isASCII(c) )
500 5369491 : continue;
501 :
502 104 : if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
503 4 : return false;
504 100 : else if ( UTF8traits::is2byte(c) )
505 42 : state = 1;
506 58 : else if ( UTF8traits::is3byte(c) )
507 : {
508 57 : state = 2;
509 57 : if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]
510 : {
511 0 : overlong = true;
512 0 : olupper = 0x9F;
513 : }
514 57 : else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
515 : {
516 0 : surrogate = true;
517 0 : slower = 0xA0;
518 : }
519 57 : else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
520 0 : nonchar = true;
521 : }
522 1 : else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
523 : {
524 0 : state = 3;
525 0 : nonchar = true;
526 0 : if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
527 : {
528 0 : overlong = true;
529 0 : olupper = 0x8F;
530 : }
531 0 : else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]
532 : {
533 : // actually not surrogates but codepoints beyond 0x10FFFF
534 0 : surrogate = true;
535 0 : slower = 0x90;
536 : }
537 : }
538 : else
539 1 : return false; // Not UTF-8 string
540 : }
541 :
542 99 : if (nonchar && !aRejectNonChar)
543 0 : nonchar = false;
544 :
545 350 : while ( ptr < end && state )
546 : {
547 154 : c = *ptr++;
548 154 : --state;
549 :
550 : // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
551 154 : if ( nonchar &&
552 : ( ( !state && c < 0xBE ) ||
553 : ( state == 1 && c != 0xBF ) ||
554 : ( state == 2 && 0x0F != (0x0F & c) )))
555 0 : nonchar = false;
556 :
557 154 : if ( !UTF8traits::isInSeq(c) || ( overlong && c <= olupper ) ||
558 : ( surrogate && slower <= c ) || ( nonchar && !state ))
559 2 : return false; // Not UTF-8 string
560 :
561 152 : overlong = surrogate = false;
562 : }
563 : }
564 63818 : return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
565 : }
566 :
567 : /**
568 : * A character sink for in-place case conversion.
569 : */
570 : class ConvertToUpperCase
571 : {
572 : public:
573 : typedef char value_type;
574 :
575 : PRUint32
576 0 : write( const char* aSource, PRUint32 aSourceLength )
577 : {
578 0 : char* cp = const_cast<char*>(aSource);
579 0 : const char* end = aSource + aSourceLength;
580 0 : while (cp != end) {
581 0 : char ch = *cp;
582 0 : if ((ch >= 'a') && (ch <= 'z'))
583 0 : *cp = ch - ('a' - 'A');
584 0 : ++cp;
585 : }
586 0 : return aSourceLength;
587 : }
588 : };
589 :
590 : void
591 0 : ToUpperCase( nsCSubstring& aCString )
592 : {
593 : ConvertToUpperCase converter;
594 : char* start;
595 0 : converter.write(aCString.BeginWriting(start), aCString.Length());
596 0 : }
597 :
598 : /**
599 : * A character sink for copying with case conversion.
600 : */
601 : class CopyToUpperCase
602 : {
603 : public:
604 : typedef char value_type;
605 :
606 0 : CopyToUpperCase( nsACString::iterator& aDestIter )
607 0 : : mIter(aDestIter)
608 : {
609 0 : }
610 :
611 : PRUint32
612 0 : write( const char* aSource, PRUint32 aSourceLength )
613 : {
614 0 : PRUint32 len = NS_MIN(PRUint32(mIter.size_forward()), aSourceLength);
615 0 : char* cp = mIter.get();
616 0 : const char* end = aSource + len;
617 0 : while (aSource != end) {
618 0 : char ch = *aSource;
619 0 : if ((ch >= 'a') && (ch <= 'z'))
620 0 : *cp = ch - ('a' - 'A');
621 : else
622 0 : *cp = ch;
623 0 : ++aSource;
624 0 : ++cp;
625 : }
626 0 : mIter.advance(len);
627 0 : return len;
628 : }
629 :
630 : protected:
631 : nsACString::iterator& mIter;
632 : };
633 :
634 : void
635 0 : ToUpperCase( const nsACString& aSource, nsACString& aDest )
636 : {
637 0 : nsACString::const_iterator fromBegin, fromEnd;
638 0 : nsACString::iterator toBegin;
639 0 : if (!SetLengthForWritingC(aDest, aSource.Length()))
640 0 : return;
641 :
642 0 : CopyToUpperCase converter(aDest.BeginWriting(toBegin));
643 0 : copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
644 : }
645 :
646 : /**
647 : * A character sink for case conversion.
648 : */
649 : class ConvertToLowerCase
650 : {
651 : public:
652 : typedef char value_type;
653 :
654 : PRUint32
655 4240110 : write( const char* aSource, PRUint32 aSourceLength )
656 : {
657 4240110 : char* cp = const_cast<char*>(aSource);
658 4240110 : const char* end = aSource + aSourceLength;
659 57720174 : while (cp != end) {
660 49239954 : char ch = *cp;
661 49239954 : if ((ch >= 'A') && (ch <= 'Z'))
662 193534 : *cp = ch + ('a' - 'A');
663 49239954 : ++cp;
664 : }
665 4240110 : return aSourceLength;
666 : }
667 : };
668 :
669 : void
670 4240110 : ToLowerCase( nsCSubstring& aCString )
671 : {
672 : ConvertToLowerCase converter;
673 : char* start;
674 4240110 : converter.write(aCString.BeginWriting(start), aCString.Length());
675 4240110 : }
676 :
677 : /**
678 : * A character sink for copying with case conversion.
679 : */
680 : class CopyToLowerCase
681 : {
682 : public:
683 : typedef char value_type;
684 :
685 0 : CopyToLowerCase( nsACString::iterator& aDestIter )
686 0 : : mIter(aDestIter)
687 : {
688 0 : }
689 :
690 : PRUint32
691 0 : write( const char* aSource, PRUint32 aSourceLength )
692 : {
693 0 : PRUint32 len = NS_MIN(PRUint32(mIter.size_forward()), aSourceLength);
694 0 : char* cp = mIter.get();
695 0 : const char* end = aSource + len;
696 0 : while (aSource != end) {
697 0 : char ch = *aSource;
698 0 : if ((ch >= 'A') && (ch <= 'Z'))
699 0 : *cp = ch + ('a' - 'A');
700 : else
701 0 : *cp = ch;
702 0 : ++aSource;
703 0 : ++cp;
704 : }
705 0 : mIter.advance(len);
706 0 : return len;
707 : }
708 :
709 : protected:
710 : nsACString::iterator& mIter;
711 : };
712 :
713 : void
714 0 : ToLowerCase( const nsACString& aSource, nsACString& aDest )
715 : {
716 0 : nsACString::const_iterator fromBegin, fromEnd;
717 0 : nsACString::iterator toBegin;
718 0 : if (!SetLengthForWritingC(aDest, aSource.Length()))
719 0 : return;
720 :
721 0 : CopyToLowerCase converter(aDest.BeginWriting(toBegin));
722 0 : copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
723 : }
724 :
725 : bool
726 638 : ParseString(const nsACString& aSource, char aDelimiter,
727 : nsTArray<nsCString>& aArray)
728 : {
729 638 : nsACString::const_iterator start, end;
730 638 : aSource.BeginReading(start);
731 638 : aSource.EndReading(end);
732 :
733 638 : PRUint32 oldLength = aArray.Length();
734 :
735 687 : for (;;)
736 : {
737 1325 : nsACString::const_iterator delimiter = start;
738 1325 : FindCharInReadable(aDelimiter, delimiter, end);
739 :
740 1325 : if (delimiter != start)
741 : {
742 1323 : if (!aArray.AppendElement(Substring(start, delimiter)))
743 : {
744 0 : aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);
745 0 : return false;
746 : }
747 : }
748 :
749 1325 : if (delimiter == end)
750 638 : break;
751 687 : start = ++delimiter;
752 687 : if (start == end)
753 0 : break;
754 : }
755 :
756 638 : return true;
757 : }
758 :
759 : template <class StringT, class IteratorT, class Comparator>
760 : bool
761 51247 : FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
762 : {
763 51247 : bool found_it = false;
764 :
765 : // only bother searching at all if we're given a non-empty range to search
766 51247 : if ( aSearchStart != aSearchEnd )
767 : {
768 51194 : IteratorT aPatternStart, aPatternEnd;
769 51194 : aPattern.BeginReading(aPatternStart);
770 51194 : aPattern.EndReading(aPatternEnd);
771 :
772 : // outer loop keeps searching till we find it or run out of string to search
773 51194 : while ( !found_it )
774 : {
775 : // fast inner loop (that's what it's called, not what it is) looks for a potential match
776 6398211 : while ( aSearchStart != aSearchEnd &&
777 : compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
778 5097757 : ++aSearchStart;
779 :
780 : // if we broke out of the `fast' loop because we're out of string ... we're done: no match
781 650227 : if ( aSearchStart == aSearchEnd )
782 50887 : break;
783 :
784 : // otherwise, we're at a potential match, let's see if we really hit one
785 599340 : IteratorT testPattern(aPatternStart);
786 599340 : IteratorT testSearch(aSearchStart);
787 :
788 : // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
789 25091 : for(;;)
790 : {
791 : // we already compared the first character in the outer loop,
792 : // so we'll advance before the next comparison
793 624431 : ++testPattern;
794 624431 : ++testSearch;
795 :
796 : // if we verified all the way to the end of the pattern, then we found it!
797 624431 : if ( testPattern == aPatternEnd )
798 : {
799 307 : found_it = true;
800 307 : aSearchEnd = testSearch; // return the exact found range through the parameters
801 307 : break;
802 : }
803 :
804 : // if we got to end of the string we're searching before we hit the end of the
805 : // pattern, we'll never find what we're looking for
806 624124 : if ( testSearch == aSearchEnd )
807 : {
808 3995 : aSearchStart = aSearchEnd;
809 3995 : break;
810 : }
811 :
812 : // else if we mismatched ... it's time to advance to the next search position
813 : // and get back into the `fast' loop
814 620129 : if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
815 : {
816 595038 : ++aSearchStart;
817 595038 : break;
818 : }
819 : }
820 : }
821 : }
822 :
823 51247 : return found_it;
824 : }
825 :
826 : /**
827 : * This searches the entire string from right to left, and returns the first match found, if any.
828 : */
829 : template <class StringT, class IteratorT, class Comparator>
830 : bool
831 2437 : RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
832 : {
833 2437 : IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
834 2437 : aPattern.BeginReading(patternStart);
835 2437 : aPattern.EndReading(patternEnd);
836 :
837 : // Point to the last character in the pattern
838 2437 : --patternEnd;
839 : // outer loop keeps searching till we run out of string to search
840 25563 : while ( aSearchStart != searchEnd )
841 : {
842 : // Point to the end position of the next possible match
843 22967 : --searchEnd;
844 :
845 : // Check last character, if a match, explore further from here
846 22967 : if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 )
847 : {
848 : // We're at a potential match, let's see if we really hit one
849 2304 : IteratorT testPattern(patternEnd);
850 2304 : IteratorT testSearch(searchEnd);
851 :
852 : // inner loop verifies the potential match at the current position
853 2110 : do
854 : {
855 : // if we verified all the way to the end of the pattern, then we found it!
856 4388 : if ( testPattern == patternStart )
857 : {
858 2278 : aSearchStart = testSearch; // point to start of match
859 2278 : aSearchEnd = ++searchEnd; // point to end of match
860 2278 : return true;
861 : }
862 :
863 : // if we got to end of the string we're searching before we hit the end of the
864 : // pattern, we'll never find what we're looking for
865 2110 : if ( testSearch == aSearchStart )
866 : {
867 0 : aSearchStart = aSearchEnd;
868 0 : return false;
869 : }
870 :
871 : // test previous character for a match
872 2110 : --testPattern;
873 2110 : --testSearch;
874 : }
875 : while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 );
876 : }
877 : }
878 :
879 159 : aSearchStart = aSearchEnd;
880 159 : return false;
881 : }
882 :
883 : bool
884 221 : FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
885 : {
886 221 : return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
887 : }
888 :
889 : bool
890 51001 : FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
891 : {
892 51001 : return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
893 : }
894 :
895 : bool
896 25 : CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
897 : {
898 25 : return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
899 : }
900 :
901 : bool
902 0 : RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
903 : {
904 0 : return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
905 : }
906 :
907 : bool
908 2437 : RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
909 : {
910 2437 : return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
911 : }
912 :
913 : bool
914 78048 : FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
915 : {
916 78048 : PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
917 :
918 78048 : const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
919 78048 : if ( charFoundAt ) {
920 37069 : aSearchStart.advance( charFoundAt - aSearchStart.get() );
921 37069 : return true;
922 : }
923 :
924 40979 : aSearchStart.advance(fragmentLength);
925 40979 : return false;
926 : }
927 :
928 : bool
929 93071 : FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )
930 : {
931 93071 : PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
932 :
933 93071 : const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
934 93071 : if ( charFoundAt ) {
935 2417 : aSearchStart.advance( charFoundAt - aSearchStart.get() );
936 2417 : return true;
937 : }
938 :
939 90654 : aSearchStart.advance(fragmentLength);
940 90654 : return false;
941 : }
942 :
943 : PRUint32
944 0 : CountCharInReadable( const nsAString& aStr,
945 : PRUnichar aChar )
946 : {
947 0 : PRUint32 count = 0;
948 0 : nsAString::const_iterator begin, end;
949 :
950 0 : aStr.BeginReading(begin);
951 0 : aStr.EndReading(end);
952 :
953 0 : while (begin != end) {
954 0 : if (*begin == aChar) {
955 0 : ++count;
956 : }
957 0 : ++begin;
958 : }
959 :
960 0 : return count;
961 : }
962 :
963 : PRUint32
964 0 : CountCharInReadable( const nsACString& aStr,
965 : char aChar )
966 : {
967 0 : PRUint32 count = 0;
968 0 : nsACString::const_iterator begin, end;
969 :
970 0 : aStr.BeginReading(begin);
971 0 : aStr.EndReading(end);
972 :
973 0 : while (begin != end) {
974 0 : if (*begin == aChar) {
975 0 : ++count;
976 : }
977 0 : ++begin;
978 : }
979 :
980 0 : return count;
981 : }
982 :
983 : bool
984 117219 : StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
985 : const nsStringComparator& aComparator )
986 : {
987 117219 : nsAString::size_type src_len = aSource.Length(),
988 117219 : sub_len = aSubstring.Length();
989 117219 : if (sub_len > src_len)
990 1958 : return false;
991 115261 : return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
992 : }
993 :
994 : bool
995 19724 : StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
996 : const nsCStringComparator& aComparator )
997 : {
998 19724 : nsACString::size_type src_len = aSource.Length(),
999 19724 : sub_len = aSubstring.Length();
1000 19724 : if (sub_len > src_len)
1001 3136 : return false;
1002 16588 : return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1003 : }
1004 :
1005 : bool
1006 1094 : StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
1007 : const nsStringComparator& aComparator )
1008 : {
1009 1094 : nsAString::size_type src_len = aSource.Length(),
1010 1094 : sub_len = aSubstring.Length();
1011 1094 : if (sub_len > src_len)
1012 0 : return false;
1013 1094 : return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1014 1094 : aComparator);
1015 : }
1016 :
1017 : bool
1018 10412 : StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
1019 : const nsCStringComparator& aComparator )
1020 : {
1021 10412 : nsACString::size_type src_len = aSource.Length(),
1022 10412 : sub_len = aSubstring.Length();
1023 10412 : if (sub_len > src_len)
1024 11 : return false;
1025 10401 : return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1026 10401 : aComparator);
1027 : }
1028 :
1029 :
1030 :
1031 : static const PRUnichar empty_buffer[1] = { '\0' };
1032 :
1033 : const nsAFlatString&
1034 5984 : EmptyString()
1035 : {
1036 5984 : static const nsDependentString sEmpty(empty_buffer);
1037 :
1038 5984 : return sEmpty;
1039 : }
1040 :
1041 : const nsAFlatCString&
1042 77178 : EmptyCString()
1043 : {
1044 77178 : static const nsDependentCString sEmpty((const char *)empty_buffer);
1045 :
1046 77178 : return sEmpty;
1047 : }
1048 :
1049 : const nsAFlatString&
1050 1 : NullString()
1051 : {
1052 1 : static const nsXPIDLString sNull;
1053 :
1054 1 : return sNull;
1055 : }
1056 :
1057 : const nsAFlatCString&
1058 0 : NullCString()
1059 : {
1060 0 : static const nsXPIDLCString sNull;
1061 :
1062 0 : return sNull;
1063 : }
1064 :
1065 : PRInt32
1066 61504 : CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
1067 : const nsASingleFragmentString& aUTF16String)
1068 : {
1069 : static const PRUint32 NOT_ASCII = PRUint32(~0x7F);
1070 :
1071 : const char *u8, *u8end;
1072 61504 : aUTF8String.BeginReading(u8);
1073 61504 : aUTF8String.EndReading(u8end);
1074 :
1075 : const PRUnichar *u16, *u16end;
1076 61504 : aUTF16String.BeginReading(u16);
1077 61504 : aUTF16String.EndReading(u16end);
1078 :
1079 641374 : while (u8 != u8end && u16 != u16end)
1080 : {
1081 : // Cast away the signedness of *u8 to prevent signextension when
1082 : // converting to PRUint32
1083 518366 : PRUint32 c8_32 = (PRUint8)*u8;
1084 :
1085 518366 : if (c8_32 & NOT_ASCII)
1086 : {
1087 : bool err;
1088 0 : c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
1089 0 : if (err)
1090 0 : return PR_INT32_MIN;
1091 :
1092 0 : PRUint32 c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
1093 : // The above UTF16CharEnumerator::NextChar() calls can
1094 : // fail, but if it does for anything other than no data to
1095 : // look at (which can't happen here), it returns the
1096 : // Unicode replacement character 0xFFFD for the invalid
1097 : // data they were fed. Ignore that error and treat invalid
1098 : // UTF16 as 0xFFFD.
1099 : //
1100 : // This matches what our UTF16 to UTF8 conversion code
1101 : // does, and thus a UTF8 string that came from an invalid
1102 : // UTF16 string will compare equal to the invalid UTF16
1103 : // string it came from. Same is true for any other UTF16
1104 : // string differs only in the invalid part of the string.
1105 :
1106 0 : if (c8_32 != c16_32)
1107 0 : return c8_32 < c16_32 ? -1 : 1;
1108 : }
1109 : else
1110 : {
1111 518366 : if (c8_32 != *u16)
1112 0 : return c8_32 > *u16 ? 1 : -1;
1113 :
1114 518366 : ++u8;
1115 518366 : ++u16;
1116 : }
1117 : }
1118 :
1119 61504 : if (u8 != u8end)
1120 : {
1121 : // We get to the end of the UTF16 string, but no to the end of
1122 : // the UTF8 string. The UTF8 string is longer than the UTF16
1123 : // string
1124 :
1125 0 : return 1;
1126 : }
1127 :
1128 61504 : if (u16 != u16end)
1129 : {
1130 : // We get to the end of the UTF8 string, but no to the end of
1131 : // the UTF16 string. The UTF16 string is longer than the UTF8
1132 : // string
1133 :
1134 0 : return -1;
1135 : }
1136 :
1137 : // The two strings match.
1138 :
1139 61504 : return 0;
1140 : }
1141 :
1142 : void
1143 0 : AppendUCS4ToUTF16(const PRUint32 aSource, nsAString& aDest)
1144 : {
1145 0 : NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
1146 0 : if (IS_IN_BMP(aSource))
1147 : {
1148 0 : aDest.Append(PRUnichar(aSource));
1149 : }
1150 : else
1151 : {
1152 0 : aDest.Append(H_SURROGATE(aSource));
1153 0 : aDest.Append(L_SURROGATE(aSource));
1154 : }
1155 0 : }
|