1 : // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "base/string_util.h"
6 :
7 : #include "build/build_config.h"
8 :
9 : #include <ctype.h>
10 : #include <errno.h>
11 : #include <math.h>
12 : #include <stdarg.h>
13 : #include <stdio.h>
14 : #include <stdlib.h>
15 : #include <string.h>
16 : #include <time.h>
17 : #include <wchar.h>
18 : #include <wctype.h>
19 :
20 : #include <algorithm>
21 : #include <vector>
22 :
23 : #include "base/basictypes.h"
24 : #include "base/logging.h"
25 : #include "base/singleton.h"
26 :
27 : namespace {
28 :
29 : // Force the singleton used by Empty[W]String[16] to be a unique type. This
30 : // prevents other code that might accidentally use Singleton<string> from
31 : // getting our internal one.
32 0 : struct EmptyStrings {
33 0 : EmptyStrings() {}
34 : const std::string s;
35 : const std::wstring ws;
36 : const string16 s16;
37 : };
38 :
39 : // Hack to convert any char-like type to its unsigned counterpart.
40 : // For example, it will convert char, signed char and unsigned char to unsigned
41 : // char.
42 : template<typename T>
43 : struct ToUnsigned {
44 : typedef T Unsigned;
45 : };
46 :
47 : template<>
48 : struct ToUnsigned<char> {
49 : typedef unsigned char Unsigned;
50 : };
51 : template<>
52 : struct ToUnsigned<signed char> {
53 : typedef unsigned char Unsigned;
54 : };
55 : template<>
56 : struct ToUnsigned<wchar_t> {
57 : #if defined(WCHAR_T_IS_UTF16)
58 : typedef unsigned short Unsigned;
59 : #elif defined(WCHAR_T_IS_UTF32)
60 : typedef uint32 Unsigned;
61 : #endif
62 : };
63 : template<>
64 : struct ToUnsigned<short> {
65 : typedef unsigned short Unsigned;
66 : };
67 :
68 : // Used by ReplaceStringPlaceholders to track the position in the string of
69 : // replaced parameters.
70 : struct ReplacementOffset {
71 0 : ReplacementOffset(int parameter, size_t offset)
72 : : parameter(parameter),
73 0 : offset(offset) {}
74 :
75 : // Index of the parameter.
76 : int parameter;
77 :
78 : // Starting position in the string.
79 : size_t offset;
80 : };
81 :
82 0 : static bool CompareParameter(const ReplacementOffset& elem1,
83 : const ReplacementOffset& elem2) {
84 0 : return elem1.parameter < elem2.parameter;
85 : }
86 :
87 : // Generalized string-to-number conversion.
88 : //
89 : // StringToNumberTraits should provide:
90 : // - a typedef for string_type, the STL string type used as input.
91 : // - a typedef for value_type, the target numeric type.
92 : // - a static function, convert_func, which dispatches to an appropriate
93 : // strtol-like function and returns type value_type.
94 : // - a static function, valid_func, which validates |input| and returns a bool
95 : // indicating whether it is in proper form. This is used to check for
96 : // conditions that convert_func tolerates but should result in
97 : // StringToNumber returning false. For strtol-like funtions, valid_func
98 : // should check for leading whitespace.
99 : template<typename StringToNumberTraits>
100 0 : bool StringToNumber(const typename StringToNumberTraits::string_type& input,
101 : typename StringToNumberTraits::value_type* output) {
102 : typedef StringToNumberTraits traits;
103 :
104 0 : errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows.
105 0 : typename traits::string_type::value_type* endptr = NULL;
106 : typename traits::value_type value = traits::convert_func(input.c_str(),
107 0 : &endptr);
108 0 : *output = value;
109 :
110 : // Cases to return false:
111 : // - If errno is ERANGE, there was an overflow or underflow.
112 : // - If the input string is empty, there was nothing to parse.
113 : // - If endptr does not point to the end of the string, there are either
114 : // characters remaining in the string after a parsed number, or the string
115 : // does not begin with a parseable number. endptr is compared to the
116 : // expected end given the string's stated length to correctly catch cases
117 : // where the string contains embedded NUL characters.
118 : // - valid_func determines that the input is not in preferred form.
119 : return errno == 0 &&
120 : !input.empty() &&
121 : input.c_str() + input.length() == endptr &&
122 0 : traits::valid_func(input);
123 : }
124 :
125 : class StringToLongTraits {
126 : public:
127 : typedef std::string string_type;
128 : typedef long value_type;
129 : static const int kBase = 10;
130 0 : static inline value_type convert_func(const string_type::value_type* str,
131 : string_type::value_type** endptr) {
132 0 : return strtol(str, endptr, kBase);
133 : }
134 0 : static inline bool valid_func(const string_type& str) {
135 0 : return !str.empty() && !isspace(str[0]);
136 : }
137 : };
138 :
139 : class String16ToLongTraits {
140 : public:
141 : typedef string16 string_type;
142 : typedef long value_type;
143 : static const int kBase = 10;
144 0 : static inline value_type convert_func(const string_type::value_type* str,
145 : string_type::value_type** endptr) {
146 : #if defined(WCHAR_T_IS_UTF16)
147 : return wcstol(str, endptr, kBase);
148 : #elif defined(WCHAR_T_IS_UTF32)
149 0 : std::string ascii_string = UTF16ToASCII(string16(str));
150 0 : char* ascii_end = NULL;
151 0 : value_type ret = strtol(ascii_string.c_str(), &ascii_end, kBase);
152 0 : if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
153 : *endptr =
154 0 : const_cast<string_type::value_type*>(str) + ascii_string.length();
155 : }
156 0 : return ret;
157 : #endif
158 : }
159 0 : static inline bool valid_func(const string_type& str) {
160 0 : return !str.empty() && !iswspace(str[0]);
161 : }
162 : };
163 :
164 : class StringToInt64Traits {
165 : public:
166 : typedef std::string string_type;
167 : typedef int64 value_type;
168 : static const int kBase = 10;
169 0 : static inline value_type convert_func(const string_type::value_type* str,
170 : string_type::value_type** endptr) {
171 : #ifdef OS_WIN
172 : return _strtoi64(str, endptr, kBase);
173 : #else // assume OS_POSIX
174 0 : return strtoll(str, endptr, kBase);
175 : #endif
176 : }
177 0 : static inline bool valid_func(const string_type& str) {
178 0 : return !str.empty() && !isspace(str[0]);
179 : }
180 : };
181 :
182 : class String16ToInt64Traits {
183 : public:
184 : typedef string16 string_type;
185 : typedef int64 value_type;
186 : static const int kBase = 10;
187 0 : static inline value_type convert_func(const string_type::value_type* str,
188 : string_type::value_type** endptr) {
189 : #ifdef OS_WIN
190 : return _wcstoi64(str, endptr, kBase);
191 : #else // assume OS_POSIX
192 0 : std::string ascii_string = UTF16ToASCII(string16(str));
193 0 : char* ascii_end = NULL;
194 0 : value_type ret = strtoll(ascii_string.c_str(), &ascii_end, kBase);
195 0 : if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
196 : *endptr =
197 0 : const_cast<string_type::value_type*>(str) + ascii_string.length();
198 : }
199 0 : return ret;
200 : #endif
201 : }
202 0 : static inline bool valid_func(const string_type& str) {
203 0 : return !str.empty() && !iswspace(str[0]);
204 : }
205 : };
206 :
207 : // For the HexString variants, use the unsigned variants like strtoul for
208 : // convert_func so that input like "0x80000000" doesn't result in an overflow.
209 :
210 : class HexStringToLongTraits {
211 : public:
212 : typedef std::string string_type;
213 : typedef long value_type;
214 : static const int kBase = 16;
215 0 : static inline value_type convert_func(const string_type::value_type* str,
216 : string_type::value_type** endptr) {
217 0 : return strtoul(str, endptr, kBase);
218 : }
219 0 : static inline bool valid_func(const string_type& str) {
220 0 : return !str.empty() && !isspace(str[0]);
221 : }
222 : };
223 :
224 : class HexString16ToLongTraits {
225 : public:
226 : typedef string16 string_type;
227 : typedef long value_type;
228 : static const int kBase = 16;
229 0 : static inline value_type convert_func(const string_type::value_type* str,
230 : string_type::value_type** endptr) {
231 : #if defined(WCHAR_T_IS_UTF16)
232 : return wcstoul(str, endptr, kBase);
233 : #elif defined(WCHAR_T_IS_UTF32)
234 0 : std::string ascii_string = UTF16ToASCII(string16(str));
235 0 : char* ascii_end = NULL;
236 0 : value_type ret = strtoul(ascii_string.c_str(), &ascii_end, kBase);
237 0 : if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
238 : *endptr =
239 0 : const_cast<string_type::value_type*>(str) + ascii_string.length();
240 : }
241 0 : return ret;
242 : #endif
243 : }
244 0 : static inline bool valid_func(const string_type& str) {
245 0 : return !str.empty() && !iswspace(str[0]);
246 : }
247 : };
248 :
249 : } // namespace
250 :
251 :
252 : namespace base {
253 :
254 0 : bool IsWprintfFormatPortable(const wchar_t* format) {
255 0 : for (const wchar_t* position = format; *position != '\0'; ++position) {
256 :
257 0 : if (*position == '%') {
258 0 : bool in_specification = true;
259 0 : bool modifier_l = false;
260 0 : while (in_specification) {
261 : // Eat up characters until reaching a known specifier.
262 0 : if (*++position == '\0') {
263 : // The format string ended in the middle of a specification. Call
264 : // it portable because no unportable specifications were found. The
265 : // string is equally broken on all platforms.
266 0 : return true;
267 : }
268 :
269 0 : if (*position == 'l') {
270 : // 'l' is the only thing that can save the 's' and 'c' specifiers.
271 0 : modifier_l = true;
272 0 : } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
273 : *position == 'S' || *position == 'C' || *position == 'F' ||
274 : *position == 'D' || *position == 'O' || *position == 'U') {
275 : // Not portable.
276 0 : return false;
277 : }
278 :
279 0 : if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
280 : // Portable, keep scanning the rest of the format string.
281 0 : in_specification = false;
282 : }
283 : }
284 : }
285 :
286 : }
287 :
288 0 : return true;
289 : }
290 :
291 :
292 : } // namespace base
293 :
294 : namespace base {
295 :
296 0 : const std::string& EmptyString() {
297 0 : return Singleton<EmptyStrings>::get()->s;
298 : }
299 :
300 0 : const std::wstring& EmptyWString() {
301 0 : return Singleton<EmptyStrings>::get()->ws;
302 : }
303 :
304 0 : const string16& EmptyString16() {
305 0 : return Singleton<EmptyStrings>::get()->s16;
306 : }
307 :
308 : }
309 :
310 : const wchar_t kWhitespaceWide[] = {
311 : 0x0009, // <control-0009> to <control-000D>
312 : 0x000A,
313 : 0x000B,
314 : 0x000C,
315 : 0x000D,
316 : 0x0020, // Space
317 : 0x0085, // <control-0085>
318 : 0x00A0, // No-Break Space
319 : 0x1680, // Ogham Space Mark
320 : 0x180E, // Mongolian Vowel Separator
321 : 0x2000, // En Quad to Hair Space
322 : 0x2001,
323 : 0x2002,
324 : 0x2003,
325 : 0x2004,
326 : 0x2005,
327 : 0x2006,
328 : 0x2007,
329 : 0x2008,
330 : 0x2009,
331 : 0x200A,
332 : 0x200C, // Zero Width Non-Joiner
333 : 0x2028, // Line Separator
334 : 0x2029, // Paragraph Separator
335 : 0x202F, // Narrow No-Break Space
336 : 0x205F, // Medium Mathematical Space
337 : 0x3000, // Ideographic Space
338 : 0
339 : };
340 : const char kWhitespaceASCII[] = {
341 : 0x09, // <control-0009> to <control-000D>
342 : 0x0A,
343 : 0x0B,
344 : 0x0C,
345 : 0x0D,
346 : 0x20, // Space
347 : 0
348 : };
349 : const char* const kCodepageUTF8 = "UTF-8";
350 :
351 : template<typename STR>
352 0 : TrimPositions TrimStringT(const STR& input,
353 : const typename STR::value_type trim_chars[],
354 : TrimPositions positions,
355 : STR* output) {
356 : // Find the edges of leading/trailing whitespace as desired.
357 0 : const typename STR::size_type last_char = input.length() - 1;
358 : const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
359 0 : input.find_first_not_of(trim_chars) : 0;
360 : const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
361 0 : input.find_last_not_of(trim_chars) : last_char;
362 :
363 : // When the string was all whitespace, report that we stripped off whitespace
364 : // from whichever position the caller was interested in. For empty input, we
365 : // stripped no whitespace, but we still need to clear |output|.
366 0 : if (input.empty() ||
367 : (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
368 0 : bool input_was_empty = input.empty(); // in case output == &input
369 0 : output->clear();
370 0 : return input_was_empty ? TRIM_NONE : positions;
371 : }
372 :
373 : // Trim the whitespace.
374 0 : *output =
375 : input.substr(first_good_char, last_good_char - first_good_char + 1);
376 :
377 : // Return where we trimmed from.
378 : return static_cast<TrimPositions>(
379 : ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
380 0 : ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
381 : }
382 :
383 0 : bool TrimString(const std::wstring& input,
384 : const wchar_t trim_chars[],
385 : std::wstring* output) {
386 0 : return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
387 : }
388 :
389 0 : bool TrimString(const std::string& input,
390 : const char trim_chars[],
391 : std::string* output) {
392 0 : return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
393 : }
394 :
395 0 : TrimPositions TrimWhitespace(const std::wstring& input,
396 : TrimPositions positions,
397 : std::wstring* output) {
398 0 : return TrimStringT(input, kWhitespaceWide, positions, output);
399 : }
400 :
401 0 : TrimPositions TrimWhitespaceASCII(const std::string& input,
402 : TrimPositions positions,
403 : std::string* output) {
404 0 : return TrimStringT(input, kWhitespaceASCII, positions, output);
405 : }
406 :
407 : // This function is only for backward-compatibility.
408 : // To be removed when all callers are updated.
409 0 : TrimPositions TrimWhitespace(const std::string& input,
410 : TrimPositions positions,
411 : std::string* output) {
412 0 : return TrimWhitespaceASCII(input, positions, output);
413 : }
414 :
415 0 : std::wstring CollapseWhitespace(const std::wstring& text,
416 : bool trim_sequences_with_line_breaks) {
417 0 : std::wstring result;
418 0 : result.resize(text.size());
419 :
420 : // Set flags to pretend we're already in a trimmed whitespace sequence, so we
421 : // will trim any leading whitespace.
422 0 : bool in_whitespace = true;
423 0 : bool already_trimmed = true;
424 :
425 0 : int chars_written = 0;
426 0 : for (std::wstring::const_iterator i(text.begin()); i != text.end(); ++i) {
427 0 : if (IsWhitespace(*i)) {
428 0 : if (!in_whitespace) {
429 : // Reduce all whitespace sequences to a single space.
430 0 : in_whitespace = true;
431 0 : result[chars_written++] = L' ';
432 : }
433 0 : if (trim_sequences_with_line_breaks && !already_trimmed &&
434 0 : ((*i == '\n') || (*i == '\r'))) {
435 : // Whitespace sequences containing CR or LF are eliminated entirely.
436 0 : already_trimmed = true;
437 0 : --chars_written;
438 : }
439 : } else {
440 : // Non-whitespace chracters are copied straight across.
441 0 : in_whitespace = false;
442 0 : already_trimmed = false;
443 0 : result[chars_written++] = *i;
444 : }
445 : }
446 :
447 0 : if (in_whitespace && !already_trimmed) {
448 : // Any trailing whitespace is eliminated.
449 0 : --chars_written;
450 : }
451 :
452 0 : result.resize(chars_written);
453 : return result;
454 : }
455 :
456 3 : std::string WideToASCII(const std::wstring& wide) {
457 3 : DCHECK(IsStringASCII(wide));
458 3 : return std::string(wide.begin(), wide.end());
459 : }
460 :
461 0 : std::wstring ASCIIToWide(const std::string& ascii) {
462 0 : DCHECK(IsStringASCII(ascii));
463 0 : return std::wstring(ascii.begin(), ascii.end());
464 : }
465 :
466 0 : std::string UTF16ToASCII(const string16& utf16) {
467 0 : DCHECK(IsStringASCII(utf16));
468 0 : return std::string(utf16.begin(), utf16.end());
469 : }
470 :
471 0 : string16 ASCIIToUTF16(const std::string& ascii) {
472 0 : DCHECK(IsStringASCII(ascii));
473 0 : return string16(ascii.begin(), ascii.end());
474 : }
475 :
476 : // Latin1 is just the low range of Unicode, so we can copy directly to convert.
477 0 : bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
478 0 : std::string output;
479 0 : output.resize(wide.size());
480 0 : latin1->clear();
481 0 : for (size_t i = 0; i < wide.size(); i++) {
482 0 : if (wide[i] > 255)
483 0 : return false;
484 0 : output[i] = static_cast<char>(wide[i]);
485 : }
486 0 : latin1->swap(output);
487 0 : return true;
488 : }
489 :
490 0 : bool IsString8Bit(const std::wstring& str) {
491 0 : for (size_t i = 0; i < str.length(); i++) {
492 0 : if (str[i] > 255)
493 0 : return false;
494 : }
495 0 : return true;
496 : }
497 :
498 : template<class STR>
499 3 : static bool DoIsStringASCII(const STR& str) {
500 22 : for (size_t i = 0; i < str.length(); i++) {
501 19 : typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
502 19 : if (c > 0x7F)
503 0 : return false;
504 : }
505 3 : return true;
506 : }
507 :
508 3 : bool IsStringASCII(const std::wstring& str) {
509 3 : return DoIsStringASCII(str);
510 : }
511 :
512 : #if !defined(WCHAR_T_IS_UTF16)
513 0 : bool IsStringASCII(const string16& str) {
514 0 : return DoIsStringASCII(str);
515 : }
516 : #endif
517 :
518 0 : bool IsStringASCII(const std::string& str) {
519 0 : return DoIsStringASCII(str);
520 : }
521 :
522 : // Helper functions that determine whether the given character begins a
523 : // UTF-8 sequence of bytes with the given length. A character satisfies
524 : // "IsInUTF8Sequence" if it is anything but the first byte in a multi-byte
525 : // character.
526 0 : static inline bool IsBegin2ByteUTF8(int c) {
527 0 : return (c & 0xE0) == 0xC0;
528 : }
529 0 : static inline bool IsBegin3ByteUTF8(int c) {
530 0 : return (c & 0xF0) == 0xE0;
531 : }
532 : static inline bool IsBegin4ByteUTF8(int c) {
533 : return (c & 0xF8) == 0xF0;
534 : }
535 0 : static inline bool IsInUTF8Sequence(int c) {
536 0 : return (c & 0xC0) == 0x80;
537 : }
538 :
539 : // This function was copied from Mozilla, with modifications. The original code
540 : // was 'IsUTF8' in xpcom/string/src/nsReadableUtils.cpp. The license block for
541 : // this function is:
542 : // This function subject to the Mozilla Public License Version
543 : // 1.1 (the "License"); you may not use this code except in compliance with
544 : // the License. You may obtain a copy of the License at
545 : // http://www.mozilla.org/MPL/
546 : //
547 : // Software distributed under the License is distributed on an "AS IS" basis,
548 : // WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
549 : // for the specific language governing rights and limitations under the
550 : // License.
551 : //
552 : // The Original Code is mozilla.org code.
553 : //
554 : // The Initial Developer of the Original Code is
555 : // Netscape Communications Corporation.
556 : // Portions created by the Initial Developer are Copyright (C) 2000
557 : // the Initial Developer. All Rights Reserved.
558 : //
559 : // Contributor(s):
560 : // Scott Collins <scc@mozilla.org> (original author)
561 : //
562 : // This is a template so that it can be run on wide and 8-bit strings. We want
563 : // to run it on wide strings when we have input that we think may have
564 : // originally been UTF-8, but has been converted to wide characters because
565 : // that's what we (and Windows) use internally.
566 : template<typename CHAR>
567 0 : static bool IsStringUTF8T(const CHAR* str, int length) {
568 0 : bool overlong = false;
569 0 : bool surrogate = false;
570 0 : bool nonchar = false;
571 :
572 : // overlong byte upper bound
573 0 : typename ToUnsigned<CHAR>::Unsigned olupper = 0;
574 :
575 : // surrogate byte lower bound
576 0 : typename ToUnsigned<CHAR>::Unsigned slower = 0;
577 :
578 : // incremented when inside a multi-byte char to indicate how many bytes
579 : // are left in the sequence
580 0 : int positions_left = 0;
581 :
582 0 : for (int i = 0; i < length; i++) {
583 : // This whole function assume an unsigned value so force its conversion to
584 : // an unsigned value.
585 0 : typename ToUnsigned<CHAR>::Unsigned c = str[i];
586 0 : if (c < 0x80)
587 0 : continue; // ASCII
588 :
589 0 : if (c <= 0xC1) {
590 : // [80-BF] where not expected, [C0-C1] for overlong
591 0 : return false;
592 0 : } else if (IsBegin2ByteUTF8(c)) {
593 0 : positions_left = 1;
594 0 : } else if (IsBegin3ByteUTF8(c)) {
595 0 : positions_left = 2;
596 0 : if (c == 0xE0) {
597 : // to exclude E0[80-9F][80-BF]
598 0 : overlong = true;
599 0 : olupper = 0x9F;
600 0 : } else if (c == 0xED) {
601 : // ED[A0-BF][80-BF]: surrogate codepoint
602 0 : surrogate = true;
603 0 : slower = 0xA0;
604 0 : } else if (c == 0xEF) {
605 : // EF BF [BE-BF] : non-character
606 : // TODO(jungshik): EF B7 [90-AF] should be checked as well.
607 0 : nonchar = true;
608 : }
609 0 : } else if (c <= 0xF4) {
610 0 : positions_left = 3;
611 0 : nonchar = true;
612 0 : if (c == 0xF0) {
613 : // to exclude F0[80-8F][80-BF]{2}
614 0 : overlong = true;
615 0 : olupper = 0x8F;
616 0 : } else if (c == 0xF4) {
617 : // to exclude F4[90-BF][80-BF]
618 : // actually not surrogates but codepoints beyond 0x10FFFF
619 0 : surrogate = true;
620 0 : slower = 0x90;
621 : }
622 : } else {
623 0 : return false;
624 : }
625 :
626 : // eat the rest of this multi-byte character
627 0 : while (positions_left) {
628 0 : positions_left--;
629 0 : i++;
630 0 : c = str[i];
631 0 : if (!c)
632 0 : return false; // end of string but not end of character sequence
633 :
634 : // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
635 0 : if (nonchar && ((!positions_left && c < 0xBE) ||
636 : (positions_left == 1 && c != 0xBF) ||
637 : (positions_left == 2 && 0x0F != (0x0F & c) ))) {
638 0 : nonchar = false;
639 : }
640 0 : if (!IsInUTF8Sequence(c) || (overlong && c <= olupper) ||
641 : (surrogate && slower <= c) || (nonchar && !positions_left) ) {
642 0 : return false;
643 : }
644 0 : overlong = surrogate = false;
645 : }
646 : }
647 0 : return true;
648 : }
649 :
650 0 : bool IsStringUTF8(const std::string& str) {
651 0 : return IsStringUTF8T(str.data(), str.length());
652 : }
653 :
654 0 : bool IsStringWideUTF8(const std::wstring& str) {
655 0 : return IsStringUTF8T(str.data(), str.length());
656 : }
657 :
658 : template<typename Iter>
659 0 : static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
660 : Iter a_end,
661 : const char* b) {
662 0 : for (Iter it = a_begin; it != a_end; ++it, ++b) {
663 0 : if (!*b || ToLowerASCII(*it) != *b)
664 0 : return false;
665 : }
666 0 : return *b == 0;
667 : }
668 :
669 : // Front-ends for LowerCaseEqualsASCII.
670 0 : bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
671 0 : return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
672 : }
673 :
674 0 : bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
675 0 : return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
676 : }
677 :
678 0 : bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
679 : std::string::const_iterator a_end,
680 : const char* b) {
681 0 : return DoLowerCaseEqualsASCII(a_begin, a_end, b);
682 : }
683 :
684 0 : bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
685 : std::wstring::const_iterator a_end,
686 : const char* b) {
687 0 : return DoLowerCaseEqualsASCII(a_begin, a_end, b);
688 : }
689 :
690 : #ifndef ANDROID
691 0 : bool LowerCaseEqualsASCII(const char* a_begin,
692 : const char* a_end,
693 : const char* b) {
694 0 : return DoLowerCaseEqualsASCII(a_begin, a_end, b);
695 : }
696 0 : bool LowerCaseEqualsASCII(const wchar_t* a_begin,
697 : const wchar_t* a_end,
698 : const char* b) {
699 0 : return DoLowerCaseEqualsASCII(a_begin, a_end, b);
700 : }
701 : #endif
702 0 : bool StartsWithASCII(const std::string& str,
703 : const std::string& search,
704 : bool case_sensitive) {
705 0 : if (case_sensitive)
706 0 : return str.compare(0, search.length(), search) == 0;
707 : else
708 0 : return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
709 : }
710 :
711 0 : bool StartsWith(const std::wstring& str,
712 : const std::wstring& search,
713 : bool case_sensitive) {
714 0 : if (case_sensitive)
715 0 : return str.compare(0, search.length(), search) == 0;
716 : else {
717 0 : if (search.size() > str.size())
718 0 : return false;
719 : return std::equal(search.begin(), search.end(), str.begin(),
720 0 : chromium_CaseInsensitiveCompare<wchar_t>());
721 : }
722 : }
723 :
724 0 : DataUnits GetByteDisplayUnits(int64 bytes) {
725 : // The byte thresholds at which we display amounts. A byte count is displayed
726 : // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
727 : // This must match the DataUnits enum.
728 : static const int64 kUnitThresholds[] = {
729 : 0, // DATA_UNITS_BYTE,
730 : 3*1024, // DATA_UNITS_KILOBYTE,
731 : 2*1024*1024, // DATA_UNITS_MEGABYTE,
732 : 1024*1024*1024 // DATA_UNITS_GIGABYTE,
733 : };
734 :
735 0 : if (bytes < 0) {
736 0 : NOTREACHED() << "Negative bytes value";
737 0 : return DATA_UNITS_BYTE;
738 : }
739 :
740 0 : int unit_index = arraysize(kUnitThresholds);
741 0 : while (--unit_index > 0) {
742 0 : if (bytes >= kUnitThresholds[unit_index])
743 0 : break;
744 : }
745 :
746 0 : DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIGABYTE);
747 0 : return DataUnits(unit_index);
748 : }
749 :
750 : // TODO(mpcomplete): deal with locale
751 : // Byte suffixes. This must match the DataUnits enum.
752 : static const wchar_t* const kByteStrings[] = {
753 : L"B",
754 : L"kB",
755 : L"MB",
756 : L"GB"
757 : };
758 :
759 : static const wchar_t* const kSpeedStrings[] = {
760 : L"B/s",
761 : L"kB/s",
762 : L"MB/s",
763 : L"GB/s"
764 : };
765 :
766 0 : std::wstring FormatBytesInternal(int64 bytes,
767 : DataUnits units,
768 : bool show_units,
769 : const wchar_t* const* suffix) {
770 0 : if (bytes < 0) {
771 0 : NOTREACHED() << "Negative bytes value";
772 0 : return std::wstring();
773 : }
774 :
775 0 : DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIGABYTE);
776 :
777 : // Put the quantity in the right units.
778 0 : double unit_amount = static_cast<double>(bytes);
779 0 : for (int i = 0; i < units; ++i)
780 0 : unit_amount /= 1024.0;
781 :
782 : wchar_t tmp[64];
783 : // If the first decimal digit is 0, don't show it.
784 : double int_part;
785 0 : double fractional_part = modf(unit_amount, &int_part);
786 0 : modf(fractional_part * 10, &int_part);
787 0 : if (int_part == 0) {
788 : base::swprintf(tmp, arraysize(tmp),
789 0 : L"%lld", static_cast<int64>(unit_amount));
790 : } else {
791 0 : base::swprintf(tmp, arraysize(tmp), L"%.1lf", unit_amount);
792 : }
793 :
794 0 : std::wstring ret(tmp);
795 0 : if (show_units) {
796 0 : ret += L" ";
797 0 : ret += suffix[units];
798 : }
799 :
800 0 : return ret;
801 : }
802 :
803 0 : std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units) {
804 0 : return FormatBytesInternal(bytes, units, show_units, kByteStrings);
805 : }
806 :
807 0 : std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
808 0 : return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
809 : }
810 :
811 : template<class StringType>
812 0 : void DoReplaceSubstringsAfterOffset(StringType* str,
813 : typename StringType::size_type start_offset,
814 : const StringType& find_this,
815 : const StringType& replace_with,
816 : bool replace_all) {
817 0 : if ((start_offset == StringType::npos) || (start_offset >= str->length()))
818 0 : return;
819 :
820 0 : DCHECK(!find_this.empty());
821 0 : for (typename StringType::size_type offs(str->find(find_this, start_offset));
822 : offs != StringType::npos; offs = str->find(find_this, offs)) {
823 0 : str->replace(offs, find_this.length(), replace_with);
824 0 : offs += replace_with.length();
825 :
826 0 : if (!replace_all)
827 0 : break;
828 : }
829 : }
830 :
831 0 : void ReplaceFirstSubstringAfterOffset(string16* str,
832 : string16::size_type start_offset,
833 : const string16& find_this,
834 : const string16& replace_with) {
835 : DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
836 0 : false); // replace first instance
837 0 : }
838 :
839 0 : void ReplaceFirstSubstringAfterOffset(std::string* str,
840 : std::string::size_type start_offset,
841 : const std::string& find_this,
842 : const std::string& replace_with) {
843 : DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
844 0 : false); // replace first instance
845 0 : }
846 :
847 0 : void ReplaceSubstringsAfterOffset(string16* str,
848 : string16::size_type start_offset,
849 : const string16& find_this,
850 : const string16& replace_with) {
851 : DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
852 0 : true); // replace all instances
853 0 : }
854 :
855 0 : void ReplaceSubstringsAfterOffset(std::string* str,
856 : std::string::size_type start_offset,
857 : const std::string& find_this,
858 : const std::string& replace_with) {
859 : DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
860 0 : true); // replace all instances
861 0 : }
862 :
863 : // Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
864 : // is the size of the buffer. These return the number of characters in the
865 : // formatted string excluding the NUL terminator. If the buffer is not
866 : // large enough to accommodate the formatted string without truncation, they
867 : // return the number of characters that would be in the fully-formatted string
868 : // (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
869 0 : inline int vsnprintfT(char* buffer,
870 : size_t buf_size,
871 : const char* format,
872 : va_list argptr) {
873 0 : return base::vsnprintf(buffer, buf_size, format, argptr);
874 : }
875 :
876 0 : inline int vsnprintfT(wchar_t* buffer,
877 : size_t buf_size,
878 : const wchar_t* format,
879 : va_list argptr) {
880 0 : return base::vswprintf(buffer, buf_size, format, argptr);
881 : }
882 :
883 : // Templatized backend for StringPrintF/StringAppendF. This does not finalize
884 : // the va_list, the caller is expected to do that.
885 : template <class StringType>
886 0 : static void StringAppendVT(StringType* dst,
887 : const typename StringType::value_type* format,
888 : va_list ap) {
889 : // First try with a small fixed size buffer.
890 : // This buffer size should be kept in sync with StringUtilTest.GrowBoundary
891 : // and StringUtilTest.StringPrintfBounds.
892 : typename StringType::value_type stack_buf[1024];
893 :
894 : va_list backup_ap;
895 0 : base_va_copy(backup_ap, ap);
896 :
897 : #if !defined(OS_WIN)
898 0 : errno = 0;
899 : #endif
900 0 : int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, backup_ap);
901 0 : va_end(backup_ap);
902 :
903 0 : if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) {
904 : // It fit.
905 0 : dst->append(stack_buf, result);
906 0 : return;
907 : }
908 :
909 : // Repeatedly increase buffer size until it fits.
910 0 : int mem_length = arraysize(stack_buf);
911 0 : while (true) {
912 0 : if (result < 0) {
913 : #if !defined(OS_WIN)
914 : // On Windows, vsnprintfT always returns the number of characters in a
915 : // fully-formatted string, so if we reach this point, something else is
916 : // wrong and no amount of buffer-doubling is going to fix it.
917 0 : if (errno != 0 && errno != EOVERFLOW)
918 : #endif
919 : {
920 : // If an error other than overflow occurred, it's never going to work.
921 0 : DLOG(WARNING) << "Unable to printf the requested string due to error.";
922 0 : return;
923 : }
924 : // Try doubling the buffer size.
925 0 : mem_length *= 2;
926 : } else {
927 : // We need exactly "result + 1" characters.
928 0 : mem_length = result + 1;
929 : }
930 :
931 0 : if (mem_length > 32 * 1024 * 1024) {
932 : // That should be plenty, don't try anything larger. This protects
933 : // against huge allocations when using vsnprintfT implementations that
934 : // return -1 for reasons other than overflow without setting errno.
935 0 : DLOG(WARNING) << "Unable to printf the requested string due to size.";
936 0 : return;
937 : }
938 :
939 0 : std::vector<typename StringType::value_type> mem_buf(mem_length);
940 :
941 : // Restore the va_list before we use it again.
942 0 : base_va_copy(backup_ap, ap);
943 :
944 0 : result = vsnprintfT(&mem_buf[0], mem_length, format, ap);
945 0 : va_end(backup_ap);
946 :
947 0 : if ((result >= 0) && (result < mem_length)) {
948 : // It fit.
949 0 : dst->append(&mem_buf[0], result);
950 : return;
951 : }
952 : }
953 : }
954 :
955 : namespace {
956 :
957 : template <typename STR, typename INT, typename UINT, bool NEG>
958 : struct IntToStringT {
959 :
960 : // This is to avoid a compiler warning about unary minus on unsigned type.
961 : // For example, say you had the following code:
962 : // template <typename INT>
963 : // INT abs(INT value) { return value < 0 ? -value : value; }
964 : // Even though if INT is unsigned, it's impossible for value < 0, so the
965 : // unary minus will never be taken, the compiler will still generate a
966 : // warning. We do a little specialization dance...
967 : template <typename INT2, typename UINT2, bool NEG2>
968 : struct ToUnsignedT { };
969 :
970 : template <typename INT2, typename UINT2>
971 : struct ToUnsignedT<INT2, UINT2, false> {
972 0 : static UINT2 ToUnsigned(INT2 value) {
973 0 : return static_cast<UINT2>(value);
974 : }
975 : };
976 :
977 : template <typename INT2, typename UINT2>
978 : struct ToUnsignedT<INT2, UINT2, true> {
979 0 : static UINT2 ToUnsigned(INT2 value) {
980 0 : return static_cast<UINT2>(value < 0 ? -value : value);
981 : }
982 : };
983 :
984 0 : static STR IntToString(INT value) {
985 : // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
986 : // So round up to allocate 3 output characters per byte, plus 1 for '-'.
987 0 : const int kOutputBufSize = 3 * sizeof(INT) + 1;
988 :
989 : // Allocate the whole string right away, we will right back to front, and
990 : // then return the substr of what we ended up using.
991 0 : STR outbuf(kOutputBufSize, 0);
992 :
993 0 : bool is_neg = value < 0;
994 : // Even though is_neg will never be true when INT is parameterized as
995 : // unsigned, even the presence of the unary operation causes a warning.
996 0 : UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
997 :
998 0 : for (typename STR::iterator it = outbuf.end();;) {
999 0 : --it;
1000 0 : DCHECK(it != outbuf.begin());
1001 0 : *it = static_cast<typename STR::value_type>((res % 10) + '0');
1002 0 : res /= 10;
1003 :
1004 : // We're done..
1005 0 : if (res == 0) {
1006 0 : if (is_neg) {
1007 0 : --it;
1008 0 : DCHECK(it != outbuf.begin());
1009 0 : *it = static_cast<typename STR::value_type>('-');
1010 : }
1011 0 : return STR(it, outbuf.end());
1012 : }
1013 : }
1014 : NOTREACHED();
1015 : return STR();
1016 : }
1017 : };
1018 :
1019 : }
1020 :
1021 0 : std::string IntToString(int value) {
1022 : return IntToStringT<std::string, int, unsigned int, true>::
1023 0 : IntToString(value);
1024 : }
1025 0 : std::wstring IntToWString(int value) {
1026 : return IntToStringT<std::wstring, int, unsigned int, true>::
1027 0 : IntToString(value);
1028 : }
1029 0 : std::string UintToString(unsigned int value) {
1030 : return IntToStringT<std::string, unsigned int, unsigned int, false>::
1031 0 : IntToString(value);
1032 : }
1033 0 : std::wstring UintToWString(unsigned int value) {
1034 : return IntToStringT<std::wstring, unsigned int, unsigned int, false>::
1035 0 : IntToString(value);
1036 : }
1037 0 : std::string Int64ToString(int64 value) {
1038 : return IntToStringT<std::string, int64, uint64, true>::
1039 0 : IntToString(value);
1040 : }
1041 0 : std::wstring Int64ToWString(int64 value) {
1042 : return IntToStringT<std::wstring, int64, uint64, true>::
1043 0 : IntToString(value);
1044 : }
1045 0 : std::string Uint64ToString(uint64 value) {
1046 : return IntToStringT<std::string, uint64, uint64, false>::
1047 0 : IntToString(value);
1048 : }
1049 0 : std::wstring Uint64ToWString(uint64 value) {
1050 : return IntToStringT<std::wstring, uint64, uint64, false>::
1051 0 : IntToString(value);
1052 : }
1053 :
1054 0 : void StringAppendV(std::string* dst, const char* format, va_list ap) {
1055 0 : StringAppendVT(dst, format, ap);
1056 0 : }
1057 :
1058 0 : void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
1059 0 : StringAppendVT(dst, format, ap);
1060 0 : }
1061 :
1062 0 : std::string StringPrintf(const char* format, ...) {
1063 : va_list ap;
1064 0 : va_start(ap, format);
1065 0 : std::string result;
1066 0 : StringAppendV(&result, format, ap);
1067 0 : va_end(ap);
1068 : return result;
1069 : }
1070 :
1071 0 : std::wstring StringPrintf(const wchar_t* format, ...) {
1072 : va_list ap;
1073 0 : va_start(ap, format);
1074 0 : std::wstring result;
1075 0 : StringAppendV(&result, format, ap);
1076 0 : va_end(ap);
1077 : return result;
1078 : }
1079 :
1080 0 : const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
1081 : va_list ap;
1082 0 : va_start(ap, format);
1083 0 : dst->clear();
1084 0 : StringAppendV(dst, format, ap);
1085 0 : va_end(ap);
1086 0 : return *dst;
1087 : }
1088 :
1089 0 : const std::wstring& SStringPrintf(std::wstring* dst,
1090 : const wchar_t* format, ...) {
1091 : va_list ap;
1092 0 : va_start(ap, format);
1093 0 : dst->clear();
1094 0 : StringAppendV(dst, format, ap);
1095 0 : va_end(ap);
1096 0 : return *dst;
1097 : }
1098 :
1099 0 : void StringAppendF(std::string* dst, const char* format, ...) {
1100 : va_list ap;
1101 0 : va_start(ap, format);
1102 0 : StringAppendV(dst, format, ap);
1103 0 : va_end(ap);
1104 0 : }
1105 :
1106 0 : void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
1107 : va_list ap;
1108 0 : va_start(ap, format);
1109 0 : StringAppendV(dst, format, ap);
1110 0 : va_end(ap);
1111 0 : }
1112 :
1113 : template<typename STR>
1114 0 : static void SplitStringT(const STR& str,
1115 : const typename STR::value_type s,
1116 : bool trim_whitespace,
1117 : std::vector<STR>* r) {
1118 0 : size_t last = 0;
1119 : size_t i;
1120 0 : size_t c = str.size();
1121 0 : for (i = 0; i <= c; ++i) {
1122 0 : if (i == c || str[i] == s) {
1123 0 : size_t len = i - last;
1124 0 : STR tmp = str.substr(last, len);
1125 0 : if (trim_whitespace) {
1126 0 : STR t_tmp;
1127 0 : TrimWhitespace(tmp, TRIM_ALL, &t_tmp);
1128 0 : r->push_back(t_tmp);
1129 : } else {
1130 0 : r->push_back(tmp);
1131 : }
1132 0 : last = i + 1;
1133 : }
1134 : }
1135 0 : }
1136 :
1137 0 : void SplitString(const std::wstring& str,
1138 : wchar_t s,
1139 : std::vector<std::wstring>* r) {
1140 0 : SplitStringT(str, s, true, r);
1141 0 : }
1142 :
1143 0 : void SplitString(const std::string& str,
1144 : char s,
1145 : std::vector<std::string>* r) {
1146 0 : SplitStringT(str, s, true, r);
1147 0 : }
1148 :
1149 0 : void SplitStringDontTrim(const std::wstring& str,
1150 : wchar_t s,
1151 : std::vector<std::wstring>* r) {
1152 0 : SplitStringT(str, s, false, r);
1153 0 : }
1154 :
1155 0 : void SplitStringDontTrim(const std::string& str,
1156 : char s,
1157 : std::vector<std::string>* r) {
1158 0 : SplitStringT(str, s, false, r);
1159 0 : }
1160 :
1161 : template<typename STR>
1162 0 : static STR JoinStringT(const std::vector<STR>& parts,
1163 : typename STR::value_type sep) {
1164 0 : if (parts.size() == 0) return STR();
1165 :
1166 0 : STR result(parts[0]);
1167 0 : typename std::vector<STR>::const_iterator iter = parts.begin();
1168 0 : ++iter;
1169 :
1170 0 : for (; iter != parts.end(); ++iter) {
1171 0 : result += sep;
1172 0 : result += *iter;
1173 : }
1174 :
1175 0 : return result;
1176 : }
1177 :
1178 0 : std::string JoinString(const std::vector<std::string>& parts, char sep) {
1179 0 : return JoinStringT(parts, sep);
1180 : }
1181 :
1182 0 : std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t sep) {
1183 0 : return JoinStringT(parts, sep);
1184 : }
1185 :
1186 0 : void SplitStringAlongWhitespace(const std::wstring& str,
1187 : std::vector<std::wstring>* result) {
1188 0 : const size_t length = str.length();
1189 0 : if (!length)
1190 0 : return;
1191 :
1192 0 : bool last_was_ws = false;
1193 0 : size_t last_non_ws_start = 0;
1194 0 : for (size_t i = 0; i < length; ++i) {
1195 0 : switch(str[i]) {
1196 : // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
1197 : case L' ':
1198 : case L'\t':
1199 : case L'\xA':
1200 : case L'\xB':
1201 : case L'\xC':
1202 : case L'\xD':
1203 0 : if (!last_was_ws) {
1204 0 : if (i > 0) {
1205 : result->push_back(
1206 0 : str.substr(last_non_ws_start, i - last_non_ws_start));
1207 : }
1208 0 : last_was_ws = true;
1209 : }
1210 0 : break;
1211 :
1212 : default: // Not a space character.
1213 0 : if (last_was_ws) {
1214 0 : last_was_ws = false;
1215 0 : last_non_ws_start = i;
1216 : }
1217 0 : break;
1218 : }
1219 : }
1220 0 : if (!last_was_ws) {
1221 : result->push_back(
1222 0 : str.substr(last_non_ws_start, length - last_non_ws_start));
1223 : }
1224 : }
1225 :
1226 0 : string16 ReplaceStringPlaceholders(const string16& format_string,
1227 : const string16& a,
1228 : size_t* offset) {
1229 0 : std::vector<size_t> offsets;
1230 : string16 result = ReplaceStringPlaceholders(format_string, a,
1231 0 : string16(),
1232 0 : string16(),
1233 0 : string16(), &offsets);
1234 0 : DCHECK(offsets.size() == 1);
1235 0 : if (offset) {
1236 0 : *offset = offsets[0];
1237 : }
1238 : return result;
1239 : }
1240 :
1241 0 : string16 ReplaceStringPlaceholders(const string16& format_string,
1242 : const string16& a,
1243 : const string16& b,
1244 : std::vector<size_t>* offsets) {
1245 0 : return ReplaceStringPlaceholders(format_string, a, b, string16(),
1246 0 : string16(), offsets);
1247 : }
1248 :
1249 0 : string16 ReplaceStringPlaceholders(const string16& format_string,
1250 : const string16& a,
1251 : const string16& b,
1252 : const string16& c,
1253 : std::vector<size_t>* offsets) {
1254 0 : return ReplaceStringPlaceholders(format_string, a, b, c, string16(),
1255 0 : offsets);
1256 : }
1257 :
1258 0 : string16 ReplaceStringPlaceholders(const string16& format_string,
1259 : const string16& a,
1260 : const string16& b,
1261 : const string16& c,
1262 : const string16& d,
1263 : std::vector<size_t>* offsets) {
1264 : // We currently only support up to 4 place holders ($1 through $4), although
1265 : // it's easy enough to add more.
1266 0 : const string16* subst_texts[] = { &a, &b, &c, &d };
1267 :
1268 0 : string16 formatted;
1269 0 : formatted.reserve(format_string.length() + a.length() +
1270 0 : b.length() + c.length() + d.length());
1271 :
1272 0 : std::vector<ReplacementOffset> r_offsets;
1273 :
1274 : // Replace $$ with $ and $1-$4 with placeholder text if it exists.
1275 0 : for (string16::const_iterator i = format_string.begin();
1276 0 : i != format_string.end(); ++i) {
1277 0 : if ('$' == *i) {
1278 0 : if (i + 1 != format_string.end()) {
1279 0 : ++i;
1280 0 : DCHECK('$' == *i || ('1' <= *i && *i <= '4')) <<
1281 0 : "Invalid placeholder: " << *i;
1282 0 : if ('$' == *i) {
1283 0 : formatted.push_back('$');
1284 : } else {
1285 0 : int index = *i - '1';
1286 0 : if (offsets) {
1287 : ReplacementOffset r_offset(index,
1288 0 : static_cast<int>(formatted.size()));
1289 : r_offsets.insert(std::lower_bound(r_offsets.begin(),
1290 : r_offsets.end(), r_offset,
1291 0 : &CompareParameter),
1292 0 : r_offset);
1293 : }
1294 0 : formatted.append(*subst_texts[index]);
1295 : }
1296 : }
1297 : } else {
1298 0 : formatted.push_back(*i);
1299 : }
1300 : }
1301 0 : if (offsets) {
1302 0 : for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
1303 0 : i != r_offsets.end(); ++i) {
1304 0 : offsets->push_back(i->offset);
1305 : }
1306 : }
1307 : return formatted;
1308 : }
1309 :
1310 : template <class CHAR>
1311 0 : static bool IsWildcard(CHAR character) {
1312 0 : return character == '*' || character == '?';
1313 : }
1314 :
1315 : // Move the strings pointers to the point where they start to differ.
1316 : template <class CHAR>
1317 0 : static void EatSameChars(const CHAR** pattern, const CHAR** string) {
1318 0 : bool escaped = false;
1319 0 : while (**pattern && **string) {
1320 0 : if (!escaped && IsWildcard(**pattern)) {
1321 : // We don't want to match wildcard here, except if it's escaped.
1322 0 : return;
1323 : }
1324 :
1325 : // Check if the escapement char is found. If so, skip it and move to the
1326 : // next character.
1327 0 : if (!escaped && **pattern == L'\\') {
1328 0 : escaped = true;
1329 0 : (*pattern)++;
1330 0 : continue;
1331 : }
1332 :
1333 : // Check if the chars match, if so, increment the ptrs.
1334 0 : if (**pattern == **string) {
1335 0 : (*pattern)++;
1336 0 : (*string)++;
1337 : } else {
1338 : // Uh ho, it did not match, we are done. If the last char was an
1339 : // escapement, that means that it was an error to advance the ptr here,
1340 : // let's put it back where it was. This also mean that the MatchPattern
1341 : // function will return false because if we can't match an escape char
1342 : // here, then no one will.
1343 0 : if (escaped) {
1344 0 : (*pattern)--;
1345 : }
1346 0 : return;
1347 : }
1348 :
1349 0 : escaped = false;
1350 : }
1351 : }
1352 :
1353 : template <class CHAR>
1354 0 : static void EatWildcard(const CHAR** pattern) {
1355 0 : while(**pattern) {
1356 0 : if (!IsWildcard(**pattern))
1357 0 : return;
1358 0 : (*pattern)++;
1359 : }
1360 : }
1361 :
1362 : template <class CHAR>
1363 0 : static bool MatchPatternT(const CHAR* eval, const CHAR* pattern) {
1364 : // Eat all the matching chars.
1365 0 : EatSameChars(&pattern, &eval);
1366 :
1367 : // If the string is empty, then the pattern must be empty too, or contains
1368 : // only wildcards.
1369 0 : if (*eval == 0) {
1370 0 : EatWildcard(&pattern);
1371 0 : if (*pattern)
1372 0 : return false;
1373 0 : return true;
1374 : }
1375 :
1376 : // Pattern is empty but not string, this is not a match.
1377 0 : if (*pattern == 0)
1378 0 : return false;
1379 :
1380 : // If this is a question mark, then we need to compare the rest with
1381 : // the current string or the string with one character eaten.
1382 0 : if (pattern[0] == '?') {
1383 0 : if (MatchPatternT(eval, pattern + 1) ||
1384 : MatchPatternT(eval + 1, pattern + 1))
1385 0 : return true;
1386 : }
1387 :
1388 : // This is a *, try to match all the possible substrings with the remainder
1389 : // of the pattern.
1390 0 : if (pattern[0] == '*') {
1391 0 : while (*eval) {
1392 0 : if (MatchPatternT(eval, pattern + 1))
1393 0 : return true;
1394 0 : eval++;
1395 : }
1396 :
1397 : // We reached the end of the string, let see if the pattern contains only
1398 : // wildcards.
1399 0 : if (*eval == 0) {
1400 0 : EatWildcard(&pattern);
1401 0 : if (*pattern)
1402 0 : return false;
1403 0 : return true;
1404 : }
1405 : }
1406 :
1407 0 : return false;
1408 : }
1409 :
1410 0 : bool MatchPattern(const std::wstring& eval, const std::wstring& pattern) {
1411 0 : return MatchPatternT(eval.c_str(), pattern.c_str());
1412 : }
1413 :
1414 0 : bool MatchPattern(const std::string& eval, const std::string& pattern) {
1415 0 : return MatchPatternT(eval.c_str(), pattern.c_str());
1416 : }
1417 :
1418 : // For the various *ToInt conversions, there are no *ToIntTraits classes to use
1419 : // because there's no such thing as strtoi. Use *ToLongTraits through a cast
1420 : // instead, requiring that long and int are compatible and equal-width. They
1421 : // are on our target platforms.
1422 :
1423 : // XXX Sigh.
1424 :
1425 : #if !defined(ARCH_CPU_64_BITS)
1426 0 : bool StringToInt(const std::string& input, int* output) {
1427 : COMPILE_ASSERT(sizeof(int) == sizeof(long), cannot_strtol_to_int);
1428 : return StringToNumber<StringToLongTraits>(input,
1429 0 : reinterpret_cast<long*>(output));
1430 : }
1431 :
1432 0 : bool StringToInt(const string16& input, int* output) {
1433 : COMPILE_ASSERT(sizeof(int) == sizeof(long), cannot_wcstol_to_int);
1434 : return StringToNumber<String16ToLongTraits>(input,
1435 0 : reinterpret_cast<long*>(output));
1436 : }
1437 :
1438 : #else
1439 : bool StringToInt(const std::string& input, int* output) {
1440 : long tmp;
1441 : bool ok = StringToNumber<StringToLongTraits>(input, &tmp);
1442 : if (!ok || tmp > kint32max) {
1443 : return false;
1444 : }
1445 : *output = static_cast<int>(tmp);
1446 : return true;
1447 : }
1448 :
1449 : bool StringToInt(const string16& input, int* output) {
1450 : long tmp;
1451 : bool ok = StringToNumber<String16ToLongTraits>(input, &tmp);
1452 : if (!ok || tmp > kint32max) {
1453 : return false;
1454 : }
1455 : *output = static_cast<int>(tmp);
1456 : return true;
1457 : }
1458 : #endif // !defined(ARCH_CPU_64_BITS)
1459 :
1460 0 : bool StringToInt64(const std::string& input, int64* output) {
1461 0 : return StringToNumber<StringToInt64Traits>(input, output);
1462 : }
1463 :
1464 0 : bool StringToInt64(const string16& input, int64* output) {
1465 0 : return StringToNumber<String16ToInt64Traits>(input, output);
1466 : }
1467 :
1468 : #if !defined(ARCH_CPU_64_BITS)
1469 0 : bool HexStringToInt(const std::string& input, int* output) {
1470 : COMPILE_ASSERT(sizeof(int) == sizeof(long), cannot_strtol_to_int);
1471 : return StringToNumber<HexStringToLongTraits>(input,
1472 0 : reinterpret_cast<long*>(output));
1473 : }
1474 :
1475 0 : bool HexStringToInt(const string16& input, int* output) {
1476 : COMPILE_ASSERT(sizeof(int) == sizeof(long), cannot_wcstol_to_int);
1477 : return StringToNumber<HexString16ToLongTraits>(
1478 0 : input, reinterpret_cast<long*>(output));
1479 : }
1480 :
1481 : #else
1482 : bool HexStringToInt(const std::string& input, int* output) {
1483 : long tmp;
1484 : bool ok = StringToNumber<HexStringToLongTraits>(input, &tmp);
1485 : if (!ok || tmp > kint32max) {
1486 : return false;
1487 : }
1488 : *output = static_cast<int>(tmp);
1489 : return true;
1490 : }
1491 :
1492 : bool HexStringToInt(const string16& input, int* output) {
1493 : long tmp;
1494 : bool ok = StringToNumber<HexString16ToLongTraits>(input, &tmp);
1495 : if (!ok || tmp > kint32max) {
1496 : return false;
1497 : }
1498 : *output = static_cast<int>(tmp);
1499 : return true;
1500 : }
1501 :
1502 : #endif // !defined(ARCH_CPU_64_BITS)
1503 :
1504 : namespace {
1505 :
1506 : template<class CHAR>
1507 0 : bool HexDigitToIntT(const CHAR digit, uint8* val) {
1508 0 : if (digit >= '0' && digit <= '9')
1509 0 : *val = digit - '0';
1510 0 : else if (digit >= 'a' && digit <= 'f')
1511 0 : *val = 10 + digit - 'a';
1512 0 : else if (digit >= 'A' && digit <= 'F')
1513 0 : *val = 10 + digit - 'A';
1514 : else
1515 0 : return false;
1516 0 : return true;
1517 : }
1518 :
1519 : template<typename STR>
1520 0 : bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
1521 0 : DCHECK(output->size() == 0);
1522 0 : int count = input.size();
1523 0 : if (count == 0 || (count % 2) != 0)
1524 0 : return false;
1525 0 : for (int i = 0; i < count / 2; ++i) {
1526 0 : uint8 msb = 0; // most significant 4 bits
1527 0 : uint8 lsb = 0; // least significant 4 bits
1528 0 : if (!HexDigitToIntT(input[i * 2], &msb) ||
1529 : !HexDigitToIntT(input[i * 2 + 1], &lsb))
1530 0 : return false;
1531 0 : output->push_back((msb << 4) | lsb);
1532 : }
1533 0 : return true;
1534 : }
1535 :
1536 : } // namespace
1537 :
1538 0 : bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
1539 0 : return HexStringToBytesT(input, output);
1540 : }
1541 :
1542 0 : bool HexStringToBytes(const string16& input, std::vector<uint8>* output) {
1543 0 : return HexStringToBytesT(input, output);
1544 : }
1545 :
1546 0 : int StringToInt(const std::string& value) {
1547 : int result;
1548 0 : StringToInt(value, &result);
1549 0 : return result;
1550 : }
1551 :
1552 0 : int StringToInt(const string16& value) {
1553 : int result;
1554 0 : StringToInt(value, &result);
1555 0 : return result;
1556 : }
1557 :
1558 0 : int64 StringToInt64(const std::string& value) {
1559 : int64 result;
1560 0 : StringToInt64(value, &result);
1561 0 : return result;
1562 : }
1563 :
1564 0 : int64 StringToInt64(const string16& value) {
1565 : int64 result;
1566 0 : StringToInt64(value, &result);
1567 0 : return result;
1568 : }
1569 :
1570 0 : int HexStringToInt(const std::string& value) {
1571 : int result;
1572 0 : HexStringToInt(value, &result);
1573 0 : return result;
1574 : }
1575 :
1576 0 : int HexStringToInt(const string16& value) {
1577 : int result;
1578 0 : HexStringToInt(value, &result);
1579 0 : return result;
1580 : }
1581 :
1582 : // The following code is compatible with the OpenBSD lcpy interface. See:
1583 : // http://www.gratisoft.us/todd/papers/strlcpy.html
1584 : // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1585 :
1586 : namespace {
1587 :
1588 : template <typename CHAR>
1589 0 : size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1590 0 : for (size_t i = 0; i < dst_size; ++i) {
1591 0 : if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1592 0 : return i;
1593 : }
1594 :
1595 : // We were left off at dst_size. We over copied 1 byte. Null terminate.
1596 0 : if (dst_size != 0)
1597 0 : dst[dst_size - 1] = 0;
1598 :
1599 : // Count the rest of the |src|, and return it's length in characters.
1600 0 : while (src[dst_size]) ++dst_size;
1601 0 : return dst_size;
1602 : }
1603 :
1604 : } // namespace
1605 :
1606 0 : size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1607 0 : return lcpyT<char>(dst, src, dst_size);
1608 : }
1609 0 : size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1610 0 : return lcpyT<wchar_t>(dst, src, dst_size);
1611 : }
1612 :
1613 0 : bool ElideString(const std::wstring& input, int max_len, std::wstring* output) {
1614 0 : DCHECK(max_len >= 0);
1615 0 : if (static_cast<int>(input.length()) <= max_len) {
1616 0 : output->assign(input);
1617 0 : return false;
1618 : }
1619 :
1620 0 : switch (max_len) {
1621 : case 0:
1622 0 : output->clear();
1623 0 : break;
1624 : case 1:
1625 0 : output->assign(input.substr(0, 1));
1626 0 : break;
1627 : case 2:
1628 0 : output->assign(input.substr(0, 2));
1629 0 : break;
1630 : case 3:
1631 0 : output->assign(input.substr(0, 1) + L"." +
1632 0 : input.substr(input.length() - 1));
1633 0 : break;
1634 : case 4:
1635 0 : output->assign(input.substr(0, 1) + L".." +
1636 0 : input.substr(input.length() - 1));
1637 0 : break;
1638 : default: {
1639 0 : int rstr_len = (max_len - 3) / 2;
1640 0 : int lstr_len = rstr_len + ((max_len - 3) % 2);
1641 0 : output->assign(input.substr(0, lstr_len) + L"..." +
1642 0 : input.substr(input.length() - rstr_len));
1643 0 : break;
1644 : }
1645 : }
1646 :
1647 0 : return true;
1648 : }
1649 :
1650 0 : std::string HexEncode(const void* bytes, size_t size) {
1651 : static const char kHexChars[] = "0123456789ABCDEF";
1652 :
1653 : // Each input byte creates two output hex characters.
1654 0 : std::string ret(size * 2, '\0');
1655 :
1656 0 : for (size_t i = 0; i < size; ++i) {
1657 0 : char b = reinterpret_cast<const char*>(bytes)[i];
1658 0 : ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
1659 0 : ret[(i * 2) + 1] = kHexChars[b & 0xf];
1660 : }
1661 : return ret;
1662 : }
|