1 : // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 : //
5 : // This file defines utility functions for working with strings.
6 :
7 : #ifndef BASE_STRING_UTIL_H_
8 : #define BASE_STRING_UTIL_H_
9 :
10 : #include <stdarg.h> // va_list
11 : #include <ctype.h>
12 :
13 : #include <string>
14 : #include <vector>
15 :
16 : #include "base/basictypes.h"
17 : #include "base/string16.h"
18 : #include "base/string_piece.h" // For implicit conversions.
19 :
20 : // Safe standard library wrappers for all platforms.
21 :
22 : namespace base {
23 :
24 : // C standard-library functions like "strncasecmp" and "snprintf" that aren't
25 : // cross-platform are provided as "base::strncasecmp", and their prototypes
26 : // are listed below. These functions are then implemented as inline calls
27 : // to the platform-specific equivalents in the platform-specific headers.
28 :
29 : // Compare the two strings s1 and s2 without regard to case using
30 : // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
31 : // s2 > s1 according to a lexicographic comparison.
32 : int strcasecmp(const char* s1, const char* s2);
33 :
34 : // Compare up to count characters of s1 and s2 without regard to case using
35 : // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
36 : // s2 > s1 according to a lexicographic comparison.
37 : int strncasecmp(const char* s1, const char* s2, size_t count);
38 :
39 : // Wrapper for vsnprintf that always null-terminates and always returns the
40 : // number of characters that would be in an untruncated formatted
41 : // string, even when truncation occurs.
42 : int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments);
43 :
44 : // vswprintf always null-terminates, but when truncation occurs, it will either
45 : // return -1 or the number of characters that would be in an untruncated
46 : // formatted string. The actual return value depends on the underlying
47 : // C library's vswprintf implementation.
48 : int vswprintf(wchar_t* buffer, size_t size,
49 : const wchar_t* format, va_list arguments);
50 :
51 : // Some of these implementations need to be inlined.
52 :
53 : inline int snprintf(char* buffer, size_t size, const char* format, ...) {
54 : va_list arguments;
55 : va_start(arguments, format);
56 : int result = vsnprintf(buffer, size, format, arguments);
57 : va_end(arguments);
58 : return result;
59 : }
60 :
61 0 : inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) {
62 : va_list arguments;
63 0 : va_start(arguments, format);
64 0 : int result = vswprintf(buffer, size, format, arguments);
65 0 : va_end(arguments);
66 0 : return result;
67 : }
68 :
69 : // BSD-style safe and consistent string copy functions.
70 : // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
71 : // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
72 : // long as |dst_size| is not 0. Returns the length of |src| in characters.
73 : // If the return value is >= dst_size, then the output was truncated.
74 : // NOTE: All sizes are in number of characters, NOT in bytes.
75 : size_t strlcpy(char* dst, const char* src, size_t dst_size);
76 : size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
77 :
78 : // Scan a wprintf format string to determine whether it's portable across a
79 : // variety of systems. This function only checks that the conversion
80 : // specifiers used by the format string are supported and have the same meaning
81 : // on a variety of systems. It doesn't check for other errors that might occur
82 : // within a format string.
83 : //
84 : // Nonportable conversion specifiers for wprintf are:
85 : // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char
86 : // data on all systems except Windows, which treat them as wchar_t data.
87 : // Use %ls and %lc for wchar_t data instead.
88 : // - 'S' and 'C', which operate on wchar_t data on all systems except Windows,
89 : // which treat them as char data. Use %ls and %lc for wchar_t data
90 : // instead.
91 : // - 'F', which is not identified by Windows wprintf documentation.
92 : // - 'D', 'O', and 'U', which are deprecated and not available on all systems.
93 : // Use %ld, %lo, and %lu instead.
94 : //
95 : // Note that there is no portable conversion specifier for char data when
96 : // working with wprintf.
97 : //
98 : // This function is intended to be called from base::vswprintf.
99 : bool IsWprintfFormatPortable(const wchar_t* format);
100 :
101 : } // namespace base
102 :
103 : #if defined(OS_WIN)
104 : #include "base/string_util_win.h"
105 : #elif defined(OS_POSIX)
106 : #include "base/string_util_posix.h"
107 : #else
108 : #error Define string operations appropriately for your platform
109 : #endif
110 :
111 : namespace base {
112 : // Returns a reference to a globally unique empty string that functions can
113 : // return. Use this to avoid static construction of strings, not to replace
114 : // any and all uses of "std::string()" as nicer-looking sugar.
115 : // These functions are threadsafe.
116 : const std::string& EmptyString();
117 : const std::wstring& EmptyWString();
118 : const string16& EmptyString16();
119 : }
120 :
121 : extern const wchar_t kWhitespaceWide[];
122 : extern const char kWhitespaceASCII[];
123 :
124 : // Names of codepages (charsets) understood by icu.
125 : extern const char* const kCodepageUTF8;
126 :
127 : // Removes characters in trim_chars from the beginning and end of input.
128 : // NOTE: Safe to use the same variable for both input and output.
129 : bool TrimString(const std::wstring& input,
130 : const wchar_t trim_chars[],
131 : std::wstring* output);
132 : bool TrimString(const std::string& input,
133 : const char trim_chars[],
134 : std::string* output);
135 :
136 : // Trims any whitespace from either end of the input string. Returns where
137 : // whitespace was found.
138 : // The non-wide version has two functions:
139 : // * TrimWhitespaceASCII()
140 : // This function is for ASCII strings and only looks for ASCII whitespace;
141 : // * TrimWhitespaceUTF8()
142 : // This function is for UTF-8 strings and looks for Unicode whitespace.
143 : // Please choose the best one according to your usage.
144 : // NOTE: Safe to use the same variable for both input and output.
145 : enum TrimPositions {
146 : TRIM_NONE = 0,
147 : TRIM_LEADING = 1 << 0,
148 : TRIM_TRAILING = 1 << 1,
149 : TRIM_ALL = TRIM_LEADING | TRIM_TRAILING
150 : };
151 : TrimPositions TrimWhitespace(const std::wstring& input,
152 : TrimPositions positions,
153 : std::wstring* output);
154 : TrimPositions TrimWhitespaceASCII(const std::string& input,
155 : TrimPositions positions,
156 : std::string* output);
157 : TrimPositions TrimWhitespaceUTF8(const std::string& input,
158 : TrimPositions positions,
159 : std::string* output);
160 :
161 : // Deprecated. This function is only for backward compatibility and calls
162 : // TrimWhitespaceASCII().
163 : TrimPositions TrimWhitespace(const std::string& input,
164 : TrimPositions positions,
165 : std::string* output);
166 :
167 : // Searches for CR or LF characters. Removes all contiguous whitespace
168 : // strings that contain them. This is useful when trying to deal with text
169 : // copied from terminals.
170 : // Returns |text, with the following three transformations:
171 : // (1) Leading and trailing whitespace is trimmed.
172 : // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
173 : // sequences containing a CR or LF are trimmed.
174 : // (3) All other whitespace sequences are converted to single spaces.
175 : std::wstring CollapseWhitespace(const std::wstring& text,
176 : bool trim_sequences_with_line_breaks);
177 :
178 : // These convert between ASCII (7-bit) and Wide/UTF16 strings.
179 : std::string WideToASCII(const std::wstring& wide);
180 : std::wstring ASCIIToWide(const std::string& ascii);
181 : std::string UTF16ToASCII(const string16& utf16);
182 : string16 ASCIIToUTF16(const std::string& ascii);
183 :
184 : // These convert between UTF-8, -16, and -32 strings. They are potentially slow,
185 : // so avoid unnecessary conversions. The low-level versions return a boolean
186 : // indicating whether the conversion was 100% valid. In this case, it will still
187 : // do the best it can and put the result in the output buffer. The versions that
188 : // return strings ignore this error and just return the best conversion
189 : // possible.
190 : bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);
191 : std::string WideToUTF8(const std::wstring& wide);
192 : bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);
193 : std::wstring UTF8ToWide(const StringPiece& utf8);
194 :
195 : bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output);
196 : string16 WideToUTF16(const std::wstring& wide);
197 : bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output);
198 : std::wstring UTF16ToWide(const string16& utf16);
199 :
200 : bool UTF8ToUTF16(const char* src, size_t src_len, string16* output);
201 : string16 UTF8ToUTF16(const std::string& utf8);
202 : bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output);
203 : std::string UTF16ToUTF8(const string16& utf16);
204 :
205 : // We are trying to get rid of wstring as much as possible, but it's too big
206 : // a mess to do it all at once. These conversions should be used when we
207 : // really should just be passing a string16 around, but we haven't finished
208 : // porting whatever module uses wstring and the conversion is being used as a
209 : // stopcock. This makes it easy to grep for the ones that should be removed.
210 : #if defined(OS_WIN)
211 : # define WideToUTF16Hack
212 : # define UTF16ToWideHack
213 : #else
214 : # define WideToUTF16Hack WideToUTF16
215 : # define UTF16ToWideHack UTF16ToWide
216 : #endif
217 :
218 : // Defines the error handling modes of WideToCodepage and CodepageToWide.
219 : class OnStringUtilConversionError {
220 : public:
221 : enum Type {
222 : // The function will return failure. The output buffer will be empty.
223 : FAIL,
224 :
225 : // The offending characters are skipped and the conversion will proceed as
226 : // if they did not exist.
227 : SKIP
228 : };
229 :
230 : private:
231 : OnStringUtilConversionError();
232 : };
233 :
234 : // Converts between wide strings and the encoding specified. If the
235 : // encoding doesn't exist or the encoding fails (when on_error is FAIL),
236 : // returns false.
237 : bool WideToCodepage(const std::wstring& wide,
238 : const char* codepage_name,
239 : OnStringUtilConversionError::Type on_error,
240 : std::string* encoded);
241 : bool CodepageToWide(const std::string& encoded,
242 : const char* codepage_name,
243 : OnStringUtilConversionError::Type on_error,
244 : std::wstring* wide);
245 :
246 : // Converts the given wide string to the corresponding Latin1. This will fail
247 : // (return false) if any characters are more than 255.
248 : bool WideToLatin1(const std::wstring& wide, std::string* latin1);
249 :
250 : // Returns true if the specified string matches the criteria. How can a wide
251 : // string be 8-bit or UTF8? It contains only characters that are < 256 (in the
252 : // first case) or characters that use only 8-bits and whose 8-bit
253 : // representation looks like a UTF-8 string (the second case).
254 : bool IsString8Bit(const std::wstring& str);
255 : bool IsStringUTF8(const std::string& str);
256 : bool IsStringWideUTF8(const std::wstring& str);
257 : bool IsStringASCII(const std::wstring& str);
258 : bool IsStringASCII(const std::string& str);
259 : bool IsStringASCII(const string16& str);
260 :
261 : // ASCII-specific tolower. The standard library's tolower is locale sensitive,
262 : // so we don't want to use it here.
263 0 : template <class Char> inline Char ToLowerASCII(Char c) {
264 0 : return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
265 : }
266 :
267 : // Converts the elements of the given string. This version uses a pointer to
268 : // clearly differentiate it from the non-pointer variant.
269 0 : template <class str> inline void StringToLowerASCII(str* s) {
270 0 : for (typename str::iterator i = s->begin(); i != s->end(); ++i)
271 0 : *i = ToLowerASCII(*i);
272 0 : }
273 :
274 0 : template <class str> inline str StringToLowerASCII(const str& s) {
275 : // for std::string and std::wstring
276 0 : str output(s);
277 0 : StringToLowerASCII(&output);
278 : return output;
279 : }
280 :
281 : // ASCII-specific toupper. The standard library's toupper is locale sensitive,
282 : // so we don't want to use it here.
283 : template <class Char> inline Char ToUpperASCII(Char c) {
284 : return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
285 : }
286 :
287 : // Converts the elements of the given string. This version uses a pointer to
288 : // clearly differentiate it from the non-pointer variant.
289 : template <class str> inline void StringToUpperASCII(str* s) {
290 : for (typename str::iterator i = s->begin(); i != s->end(); ++i)
291 : *i = ToUpperASCII(*i);
292 : }
293 :
294 : template <class str> inline str StringToUpperASCII(const str& s) {
295 : // for std::string and std::wstring
296 : str output(s);
297 : StringToUpperASCII(&output);
298 : return output;
299 : }
300 :
301 : // Compare the lower-case form of the given string against the given ASCII
302 : // string. This is useful for doing checking if an input string matches some
303 : // token, and it is optimized to avoid intermediate string copies. This API is
304 : // borrowed from the equivalent APIs in Mozilla.
305 : bool LowerCaseEqualsASCII(const std::string& a, const char* b);
306 : bool LowerCaseEqualsASCII(const std::wstring& a, const char* b);
307 :
308 : // Same thing, but with string iterators instead.
309 : bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
310 : std::string::const_iterator a_end,
311 : const char* b);
312 : bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
313 : std::wstring::const_iterator a_end,
314 : const char* b);
315 : bool LowerCaseEqualsASCII(const char* a_begin,
316 : const char* a_end,
317 : const char* b);
318 : bool LowerCaseEqualsASCII(const wchar_t* a_begin,
319 : const wchar_t* a_end,
320 : const char* b);
321 :
322 : // Returns true if str starts with search, or false otherwise.
323 : bool StartsWithASCII(const std::string& str,
324 : const std::string& search,
325 : bool case_sensitive);
326 : bool StartsWith(const std::wstring& str,
327 : const std::wstring& search,
328 : bool case_sensitive);
329 :
330 : // Determines the type of ASCII character, independent of locale (the C
331 : // library versions will change based on locale).
332 : template <typename Char>
333 : inline bool IsAsciiWhitespace(Char c) {
334 : return c == ' ' || c == '\r' || c == '\n' || c == '\t';
335 : }
336 : template <typename Char>
337 : inline bool IsAsciiAlpha(Char c) {
338 : return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'));
339 : }
340 : template <typename Char>
341 : inline bool IsAsciiDigit(Char c) {
342 : return c >= '0' && c <= '9';
343 : }
344 :
345 : // Returns true if it's a whitespace character.
346 0 : inline bool IsWhitespace(wchar_t c) {
347 0 : return wcschr(kWhitespaceWide, c) != NULL;
348 : }
349 :
350 : // TODO(mpcomplete): Decide if we should change these names to KIBI, etc,
351 : // or if we should actually use metric units, or leave as is.
352 : enum DataUnits {
353 : DATA_UNITS_BYTE = 0,
354 : DATA_UNITS_KILOBYTE,
355 : DATA_UNITS_MEGABYTE,
356 : DATA_UNITS_GIGABYTE
357 : };
358 :
359 : // Return the unit type that is appropriate for displaying the amount of bytes
360 : // passed in.
361 : DataUnits GetByteDisplayUnits(int64 bytes);
362 :
363 : // Return a byte string in human-readable format, displayed in units appropriate
364 : // specified by 'units', with an optional unit suffix.
365 : // Ex: FormatBytes(512, DATA_UNITS_KILOBYTE, true) => "0.5 KB"
366 : // Ex: FormatBytes(10*1024, DATA_UNITS_MEGABYTE, false) => "0.1"
367 : std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units);
368 :
369 : // As above, but with "/s" units.
370 : // Ex: FormatSpeed(512, DATA_UNITS_KILOBYTE, true) => "0.5 KB/s"
371 : // Ex: FormatSpeed(10*1024, DATA_UNITS_MEGABYTE, false) => "0.1"
372 : std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units);
373 :
374 : // Return a number formated with separators in the user's locale way.
375 : // Ex: FormatNumber(1234567) => 1,234,567
376 : std::wstring FormatNumber(int64 number);
377 :
378 : // Starting at |start_offset| (usually 0), replace the first instance of
379 : // |find_this| with |replace_with|.
380 : void ReplaceFirstSubstringAfterOffset(string16* str,
381 : string16::size_type start_offset,
382 : const string16& find_this,
383 : const string16& replace_with);
384 : void ReplaceFirstSubstringAfterOffset(std::string* str,
385 : std::string::size_type start_offset,
386 : const std::string& find_this,
387 : const std::string& replace_with);
388 :
389 : // Starting at |start_offset| (usually 0), look through |str| and replace all
390 : // instances of |find_this| with |replace_with|.
391 : //
392 : // This does entire substrings; use std::replace in <algorithm> for single
393 : // characters, for example:
394 : // std::replace(str.begin(), str.end(), 'a', 'b');
395 : void ReplaceSubstringsAfterOffset(string16* str,
396 : string16::size_type start_offset,
397 : const string16& find_this,
398 : const string16& replace_with);
399 : void ReplaceSubstringsAfterOffset(std::string* str,
400 : std::string::size_type start_offset,
401 : const std::string& find_this,
402 : const std::string& replace_with);
403 :
404 : // Specialized string-conversion functions.
405 : std::string IntToString(int value);
406 : std::wstring IntToWString(int value);
407 : std::string UintToString(unsigned int value);
408 : std::wstring UintToWString(unsigned int value);
409 : std::string Int64ToString(int64 value);
410 : std::wstring Int64ToWString(int64 value);
411 : std::string Uint64ToString(uint64 value);
412 : std::wstring Uint64ToWString(uint64 value);
413 : // The DoubleToString methods convert the double to a string format that
414 : // ignores the locale. If you want to use locale specific formatting, use ICU.
415 : std::string DoubleToString(double value);
416 : std::wstring DoubleToWString(double value);
417 :
418 : // Perform a best-effort conversion of the input string to a numeric type,
419 : // setting |*output| to the result of the conversion. Returns true for
420 : // "perfect" conversions; returns false in the following cases:
421 : // - Overflow/underflow. |*output| will be set to the maximum value supported
422 : // by the data type.
423 : // - Trailing characters in the string after parsing the number. |*output|
424 : // will be set to the value of the number that was parsed.
425 : // - No characters parseable as a number at the beginning of the string.
426 : // |*output| will be set to 0.
427 : // - Empty string. |*output| will be set to 0.
428 : bool StringToInt(const std::string& input, int* output);
429 : bool StringToInt(const string16& input, int* output);
430 : bool StringToInt64(const std::string& input, int64* output);
431 : bool StringToInt64(const string16& input, int64* output);
432 : bool HexStringToInt(const std::string& input, int* output);
433 : bool HexStringToInt(const string16& input, int* output);
434 :
435 : // Similar to the previous functions, except that output is a vector of bytes.
436 : // |*output| will contain as many bytes as were successfully parsed prior to the
437 : // error. There is no overflow, but input.size() must be evenly divisible by 2.
438 : // Leading 0x or +/- are not allowed.
439 : bool HexStringToBytes(const std::string& input, std::vector<uint8>* output);
440 : bool HexStringToBytes(const string16& input, std::vector<uint8>* output);
441 :
442 : // For floating-point conversions, only conversions of input strings in decimal
443 : // form are defined to work. Behavior with strings representing floating-point
444 : // numbers in hexadecimal, and strings representing non-fininte values (such as
445 : // NaN and inf) is undefined. Otherwise, these behave the same as the integral
446 : // variants. This expects the input string to NOT be specific to the locale.
447 : // If your input is locale specific, use ICU to read the number.
448 : bool StringToDouble(const std::string& input, double* output);
449 : bool StringToDouble(const string16& input, double* output);
450 :
451 : // Convenience forms of the above, when the caller is uninterested in the
452 : // boolean return value. These return only the |*output| value from the
453 : // above conversions: a best-effort conversion when possible, otherwise, 0.
454 : int StringToInt(const std::string& value);
455 : int StringToInt(const string16& value);
456 : int64 StringToInt64(const std::string& value);
457 : int64 StringToInt64(const string16& value);
458 : int HexStringToInt(const std::string& value);
459 : int HexStringToInt(const string16& value);
460 : double StringToDouble(const std::string& value);
461 : double StringToDouble(const string16& value);
462 :
463 : // Return a C++ string given printf-like input.
464 : std::string StringPrintf(const char* format, ...);
465 : std::wstring StringPrintf(const wchar_t* format, ...);
466 :
467 : // Store result into a supplied string and return it
468 : const std::string& SStringPrintf(std::string* dst, const char* format, ...);
469 : const std::wstring& SStringPrintf(std::wstring* dst,
470 : const wchar_t* format, ...);
471 :
472 : // Append result to a supplied string
473 : void StringAppendF(std::string* dst, const char* format, ...);
474 : void StringAppendF(std::wstring* dst, const wchar_t* format, ...);
475 :
476 : // Lower-level routine that takes a va_list and appends to a specified
477 : // string. All other routines are just convenience wrappers around it.
478 : void StringAppendV(std::string* dst, const char* format, va_list ap);
479 : void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap);
480 :
481 : // This is mpcomplete's pattern for saving a string copy when dealing with
482 : // a function that writes results into a wchar_t[] and wanting the result to
483 : // end up in a std::wstring. It ensures that the std::wstring's internal
484 : // buffer has enough room to store the characters to be written into it, and
485 : // sets its .length() attribute to the right value.
486 : //
487 : // The reserve() call allocates the memory required to hold the string
488 : // plus a terminating null. This is done because resize() isn't
489 : // guaranteed to reserve space for the null. The resize() call is
490 : // simply the only way to change the string's 'length' member.
491 : //
492 : // XXX-performance: the call to wide.resize() takes linear time, since it fills
493 : // the string's buffer with nulls. I call it to change the length of the
494 : // string (needed because writing directly to the buffer doesn't do this).
495 : // Perhaps there's a constant-time way to change the string's length.
496 : template <class string_type>
497 : inline typename string_type::value_type* WriteInto(string_type* str,
498 : size_t length_with_null) {
499 : str->reserve(length_with_null);
500 : str->resize(length_with_null - 1);
501 : return &((*str)[0]);
502 : }
503 :
504 : //-----------------------------------------------------------------------------
505 :
506 : // Function objects to aid in comparing/searching strings.
507 :
508 : template<typename Char> struct chromium_CaseInsensitiveCompare {
509 : public:
510 0 : bool operator()(Char x, Char y) const {
511 0 : return tolower(x) == tolower(y);
512 : }
513 : };
514 :
515 : template<typename Char> struct CaseInsensitiveCompareASCII {
516 : public:
517 : bool operator()(Char x, Char y) const {
518 : return ToLowerASCII(x) == ToLowerASCII(y);
519 : }
520 : };
521 :
522 : //-----------------------------------------------------------------------------
523 :
524 : // Splits |str| into a vector of strings delimited by |s|. Append the results
525 : // into |r| as they appear. If several instances of |s| are contiguous, or if
526 : // |str| begins with or ends with |s|, then an empty string is inserted.
527 : //
528 : // Every substring is trimmed of any leading or trailing white space.
529 : void SplitString(const std::wstring& str,
530 : wchar_t s,
531 : std::vector<std::wstring>* r);
532 : void SplitString(const std::string& str,
533 : char s,
534 : std::vector<std::string>* r);
535 :
536 : // The same as SplitString, but don't trim white space.
537 : void SplitStringDontTrim(const std::wstring& str,
538 : wchar_t s,
539 : std::vector<std::wstring>* r);
540 : void SplitStringDontTrim(const std::string& str,
541 : char s,
542 : std::vector<std::string>* r);
543 :
544 : // Does the opposite of SplitString().
545 : std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t s);
546 : std::string JoinString(const std::vector<std::string>& parts, char s);
547 :
548 : // WARNING: this uses whitespace as defined by the HTML5 spec. If you need
549 : // a function similar to this but want to trim all types of whitespace, then
550 : // factor this out into a function that takes a string containing the characters
551 : // that are treated as whitespace.
552 : //
553 : // Splits the string along whitespace (where whitespace is the five space
554 : // characters defined by HTML 5). Each contiguous block of non-whitespace
555 : // characters is added to result.
556 : void SplitStringAlongWhitespace(const std::wstring& str,
557 : std::vector<std::wstring>* result);
558 :
559 : // Replace $1-$2-$3 in the format string with |a| and |b| respectively.
560 : // Additionally, $$ is replaced by $. The offset/offsets parameter here can be
561 : // NULL.
562 : string16 ReplaceStringPlaceholders(const string16& format_string,
563 : const string16& a,
564 : size_t* offset);
565 :
566 : string16 ReplaceStringPlaceholders(const string16& format_string,
567 : const string16& a,
568 : const string16& b,
569 : std::vector<size_t>* offsets);
570 :
571 : string16 ReplaceStringPlaceholders(const string16& format_string,
572 : const string16& a,
573 : const string16& b,
574 : const string16& c,
575 : std::vector<size_t>* offsets);
576 :
577 : string16 ReplaceStringPlaceholders(const string16& format_string,
578 : const string16& a,
579 : const string16& b,
580 : const string16& c,
581 : const string16& d,
582 : std::vector<size_t>* offsets);
583 :
584 : // If the size of |input| is more than |max_len|, this function returns true and
585 : // |input| is shortened into |output| by removing chars in the middle (they are
586 : // replaced with up to 3 dots, as size permits).
587 : // Ex: ElideString(L"Hello", 10, &str) puts Hello in str and returns false.
588 : // ElideString(L"Hello my name is Tom", 10, &str) puts "Hell...Tom" in str and
589 : // returns true.
590 : bool ElideString(const std::wstring& input, int max_len, std::wstring* output);
591 :
592 : // Returns true if the string passed in matches the pattern. The pattern
593 : // string can contain wildcards like * and ?
594 : // TODO(iyengar) This function may not work correctly for CJK strings as
595 : // it does individual character matches.
596 : // The backslash character (\) is an escape character for * and ?
597 : bool MatchPattern(const std::wstring& string, const std::wstring& pattern);
598 : bool MatchPattern(const std::string& string, const std::string& pattern);
599 :
600 : // Returns a hex string representation of a binary buffer.
601 : // The returned hex string will be in upper case.
602 : // This function does not check if |size| is within reasonable limits since
603 : // it's written with trusted data in mind.
604 : // If you suspect that the data you want to format might be large,
605 : // the absolute max size for |size| should be is
606 : // std::numeric_limits<size_t>::max() / 2
607 : std::string HexEncode(const void* bytes, size_t size);
608 :
609 :
610 : #endif // BASE_STRING_UTIL_H_
|