1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 2000
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Scott Collins <scc@mozilla.org> (original author)
24 : * Johnny Stenbeck <jst@netscape.com>
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either of the GNU General Public License Version 2 or later (the "GPL"),
28 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 :
40 : #ifndef nsReadableUtils_h___
41 : #define nsReadableUtils_h___
42 :
43 : /**
44 : * I guess all the routines in this file are all mis-named.
45 : * According to our conventions, they should be |NS_xxx|.
46 : */
47 :
48 : #ifndef nsAString_h___
49 : #include "nsAString.h"
50 : #endif
51 : #include "nsTArray.h"
52 :
53 234735 : inline size_t Distance( const nsReadingIterator<PRUnichar>& start, const nsReadingIterator<PRUnichar>& end )
54 : {
55 234735 : return end.get() - start.get();
56 : }
57 3007 : inline size_t Distance( const nsReadingIterator<char>& start, const nsReadingIterator<char>& end )
58 : {
59 3007 : return end.get() - start.get();
60 : }
61 :
62 : void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest NS_OUTPARAM );
63 : void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest NS_OUTPARAM );
64 :
65 : void LossyCopyUTF16toASCII( const PRUnichar* aSource, nsACString& aDest NS_OUTPARAM );
66 : void CopyASCIItoUTF16( const char* aSource, nsAString& aDest NS_OUTPARAM );
67 :
68 : void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest NS_OUTPARAM );
69 : void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest NS_OUTPARAM );
70 :
71 : void CopyUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest NS_OUTPARAM );
72 : void CopyUTF8toUTF16( const char* aSource, nsAString& aDest NS_OUTPARAM );
73 :
74 : void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest );
75 : void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
76 :
77 : void LossyAppendUTF16toASCII( const PRUnichar* aSource, nsACString& aDest );
78 : void AppendASCIItoUTF16( const char* aSource, nsAString& aDest );
79 :
80 : void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
81 : void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
82 :
83 : void AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest );
84 : void AppendUTF8toUTF16( const char* aSource, nsAString& aDest );
85 :
86 : /**
87 : * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
88 : *
89 : * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
90 : * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
91 : * This conversion is not well defined; but it reproduces legacy string behavior.
92 : * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
93 : *
94 : * @param aSource a 16-bit wide string
95 : * @return a new |char| buffer you must free with |nsMemory::Free|.
96 : */
97 : char* ToNewCString( const nsAString& aSource );
98 :
99 :
100 : /**
101 : * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
102 : *
103 : * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
104 : * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
105 : *
106 : * @param aSource an 8-bit wide string
107 : * @return a new |char| buffer you must free with |nsMemory::Free|.
108 : */
109 : char* ToNewCString( const nsACString& aSource );
110 :
111 : /**
112 : * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
113 : *
114 : * Allocates and returns a new |char| buffer which you must free with
115 : * |nsMemory::Free|.
116 : * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
117 : * copying |aSource| to your new buffer.
118 : * The new buffer is zero-terminated, but that may not help you if |aSource|
119 : * contains embedded nulls.
120 : *
121 : * @param aSource a UTF-16 string (made of PRUnichar's)
122 : * @param aUTF8Count the number of 8-bit units that was returned
123 : * @return a new |char| buffer you must free with |nsMemory::Free|.
124 : */
125 :
126 : char* ToNewUTF8String( const nsAString& aSource, PRUint32 *aUTF8Count = nsnull );
127 :
128 :
129 : /**
130 : * Returns a new |PRUnichar| buffer containing a zero-terminated copy of
131 : * |aSource|.
132 : *
133 : * Allocates and returns a new |PRUnichar| buffer which you must free with
134 : * |nsMemory::Free|.
135 : * The new buffer is zero-terminated, but that may not help you if |aSource|
136 : * contains embedded nulls.
137 : *
138 : * @param aSource a UTF-16 string
139 : * @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
140 : */
141 : PRUnichar* ToNewUnicode( const nsAString& aSource );
142 :
143 :
144 : /**
145 : * Returns a new |PRUnichar| buffer containing a zero-terminated copy of |aSource|.
146 : *
147 : * Allocates and returns a new |PRUnichar| buffer which you must free with |nsMemory::Free|.
148 : * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
149 : * This conversion is not well defined; but it reproduces legacy string behavior.
150 : * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
151 : *
152 : * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
153 : * @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
154 : */
155 : PRUnichar* ToNewUnicode( const nsACString& aSource );
156 :
157 : /**
158 : * Returns a new |PRUnichar| buffer containing a zero-terminated copy
159 : * of |aSource|.
160 : *
161 : * Allocates and returns a new |char| buffer which you must free with
162 : * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16
163 : * while copying |aSource| to your new buffer. This conversion is well defined
164 : * for a valid UTF-8 string. The new buffer is zero-terminated, but that
165 : * may not help you if |aSource| contains embedded nulls.
166 : *
167 : * @param aSource an 8-bit wide string, UTF-8 encoded
168 : * @param aUTF16Count the number of 16-bit units that was returned
169 : * @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
170 : * (UTF-16 encoded)
171 : */
172 : PRUnichar* UTF8ToNewUnicode( const nsACString& aSource, PRUint32 *aUTF16Count = nsnull );
173 :
174 : /**
175 : * Copies |aLength| 16-bit code units from the start of |aSource| to the
176 : * |PRUnichar| buffer |aDest|.
177 : *
178 : * After this operation |aDest| is not null terminated.
179 : *
180 : * @param aSource a UTF-16 string
181 : * @param aSrcOffset start offset in the source string
182 : * @param aDest a |PRUnichar| buffer
183 : * @param aLength the number of 16-bit code units to copy
184 : * @return pointer to destination buffer - identical to |aDest|
185 : */
186 : PRUnichar* CopyUnicodeTo( const nsAString& aSource,
187 : PRUint32 aSrcOffset,
188 : PRUnichar* aDest,
189 : PRUint32 aLength );
190 :
191 :
192 : /**
193 : * Copies 16-bit characters between iterators |aSrcStart| and
194 : * |aSrcEnd| to the writable string |aDest|. Similar to the
195 : * |nsString::Mid| method.
196 : *
197 : * After this operation |aDest| is not null terminated.
198 : *
199 : * @param aSrcStart start source iterator
200 : * @param aSrcEnd end source iterator
201 : * @param aDest destination for the copy
202 : */
203 : void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
204 : const nsAString::const_iterator& aSrcEnd,
205 : nsAString& aDest );
206 :
207 : /**
208 : * Appends 16-bit characters between iterators |aSrcStart| and
209 : * |aSrcEnd| to the writable string |aDest|.
210 : *
211 : * After this operation |aDest| is not null terminated.
212 : *
213 : * @param aSrcStart start source iterator
214 : * @param aSrcEnd end source iterator
215 : * @param aDest destination for the copy
216 : */
217 : void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
218 : const nsAString::const_iterator& aSrcEnd,
219 : nsAString& aDest );
220 :
221 : /**
222 : * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
223 : *
224 : * @param aString a 16-bit wide string to scan
225 : */
226 : bool IsASCII( const nsAString& aString );
227 :
228 : /**
229 : * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
230 : *
231 : * @param aString a 8-bit wide string to scan
232 : */
233 : bool IsASCII( const nsACString& aString );
234 :
235 : /**
236 : * Returns |true| if |aString| is a valid UTF-8 string.
237 : * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
238 : * It is mainly written to replace and roughly equivalent to
239 : *
240 : * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
241 : *
242 : * (see bug 191541)
243 : * As such, it does not check for non-UTF-8 7bit encodings such as
244 : * ISO-2022-JP and HZ.
245 : *
246 : * It rejects sequences with the following errors:
247 : *
248 : * byte sequences that cannot be decoded into characters according to
249 : * UTF-8's rules (including cases where the input is part of a valid
250 : * UTF-8 sequence but starts or ends mid-character)
251 : * overlong sequences (i.e., cases where a character was encoded
252 : * non-canonically by using more bytes than necessary)
253 : * surrogate codepoints (i.e., the codepoints reserved for
254 : representing astral characters in UTF-16)
255 : * codepoints above the unicode range (i.e., outside the first 17
256 : * planes; higher than U+10FFFF), in accordance with
257 : * http://tools.ietf.org/html/rfc3629
258 : * when aRejectNonChar is true (the default), any codepoint whose low
259 : * 16 bits are 0xFFFE or 0xFFFF
260 :
261 : *
262 : * @param aString an 8-bit wide string to scan
263 : * @param aRejectNonChar a boolean to control the rejection of utf-8
264 : * non characters
265 : */
266 : bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true );
267 :
268 : bool ParseString(const nsACString& aAstring, char aDelimiter,
269 : nsTArray<nsCString>& aArray);
270 :
271 : /**
272 : * Converts case in place in the argument string.
273 : */
274 : void ToUpperCase( nsACString& );
275 :
276 : void ToLowerCase( nsACString& );
277 :
278 : void ToUpperCase( nsCSubstring& );
279 :
280 : void ToLowerCase( nsCSubstring& );
281 :
282 : /**
283 : * Converts case from string aSource to aDest.
284 : */
285 : void ToUpperCase( const nsACString& aSource, nsACString& aDest );
286 :
287 : void ToLowerCase( const nsACString& aSource, nsACString& aDest );
288 :
289 : /**
290 : * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
291 : *
292 : * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
293 : * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
294 : *
295 : * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
296 : * If we need something faster, then we can implement that later.
297 : */
298 :
299 : bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
300 : bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
301 :
302 : /* sometimes we don't care about where the string was, just that we
303 : * found it or not */
304 2 : inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() )
305 : {
306 2 : nsAString::const_iterator start, end;
307 2 : aSource.BeginReading(start);
308 2 : aSource.EndReading(end);
309 2 : return FindInReadable(aPattern, start, end, compare);
310 : }
311 :
312 0 : inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() )
313 : {
314 0 : nsACString::const_iterator start, end;
315 0 : aSource.BeginReading(start);
316 0 : aSource.EndReading(end);
317 0 : return FindInReadable(aPattern, start, end, compare);
318 : }
319 :
320 :
321 : bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& );
322 :
323 : /**
324 : * Finds the rightmost occurrence of |aPattern|
325 : * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
326 : * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
327 : *
328 : */
329 : bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
330 : bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
331 :
332 : /**
333 : * Finds the leftmost occurrence of |aChar|, if any in the range
334 : * |aSearchStart|..|aSearchEnd|.
335 : *
336 : * Returns |true| if a match was found, and adjusts |aSearchStart| to
337 : * point to the match. If no match was found, returns |false| and
338 : * makes |aSearchStart == aSearchEnd|.
339 : */
340 : bool FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd );
341 : bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd );
342 :
343 : /**
344 : * Finds the number of occurences of |aChar| in the string |aStr|
345 : */
346 : PRUint32 CountCharInReadable( const nsAString& aStr,
347 : PRUnichar aChar );
348 : PRUint32 CountCharInReadable( const nsACString& aStr,
349 : char aChar );
350 :
351 : bool
352 : StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
353 : const nsStringComparator& aComparator =
354 : nsDefaultStringComparator() );
355 : bool
356 : StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
357 : const nsCStringComparator& aComparator =
358 : nsDefaultCStringComparator() );
359 : bool
360 : StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
361 : const nsStringComparator& aComparator =
362 : nsDefaultStringComparator() );
363 : bool
364 : StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
365 : const nsCStringComparator& aComparator =
366 : nsDefaultCStringComparator() );
367 :
368 : const nsAFlatString& EmptyString();
369 : const nsAFlatCString& EmptyCString();
370 :
371 : const nsAFlatString& NullString();
372 : const nsAFlatCString& NullCString();
373 :
374 : /**
375 : * Compare a UTF-8 string to an UTF-16 string.
376 : *
377 : * Returns 0 if the strings are equal, -1 if aUTF8String is less
378 : * than aUTF16Count, and 1 in the reverse case. In case of fatal
379 : * error (eg the strings are not valid UTF8 and UTF16 respectively),
380 : * this method will return PR_INT32_MIN.
381 : */
382 : PRInt32
383 : CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
384 : const nsASingleFragmentString& aUTF16String);
385 :
386 : void
387 : AppendUCS4ToUTF16(const PRUint32 aSource, nsAString& aDest);
388 :
389 : template<class T>
390 520674 : inline bool EnsureStringLength(T& aStr, PRUint32 aLen)
391 : {
392 520674 : aStr.SetLength(aLen);
393 520674 : return (aStr.Length() == aLen);
394 : }
395 :
396 : #endif // !defined(nsReadableUtils_h___)
|