1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=4 sw=4 et tw=79 ft=cpp:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 : *
7 : * The contents of this file are subject to the Mozilla Public License Version
8 : * 1.1 (the "License"); you may not use this file except in compliance with
9 : * the License. You may obtain a copy of the License at
10 : * http://www.mozilla.org/MPL/
11 : *
12 : * Software distributed under the License is distributed on an "AS IS" basis,
13 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 : * for the specific language governing rights and limitations under the
15 : * License.
16 : *
17 : * The Original Code is SpiderMonkey JavaScript engine.
18 : *
19 : * The Initial Developer of the Original Code is
20 : * Mozilla Corporation.
21 : * Portions created by the Initial Developer are Copyright (C) 2009
22 : * the Initial Developer. All Rights Reserved.
23 : *
24 : * Contributor(s):
25 : * Luke Wagner <luke@mozilla.com>
26 : *
27 : * Alternatively, the contents of this file may be used under the terms of
28 : * either the GNU General Public License Version 2 or later (the "GPL"), or
29 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 : * in which case the provisions of the GPL or the LGPL are applicable instead
31 : * of those above. If you wish to allow use of your version of this file only
32 : * under the terms of either the GPL or the LGPL, and not to allow others to
33 : * use your version of this file under the terms of the MPL, indicate your
34 : * decision by deleting the provisions above and replace them with the notice
35 : * and other provisions required by the GPL or the LGPL. If you do not delete
36 : * the provisions above, a recipient may use your version of this file under
37 : * the terms of any one of the MPL, the GPL or the LGPL.
38 : *
39 : * ***** END LICENSE BLOCK ***** */
40 :
41 : #include "mozilla/RangedPtr.h"
42 :
43 : #include "jsgcmark.h"
44 :
45 : #include "String.h"
46 : #include "String-inl.h"
47 :
48 : #include "jsobjinlines.h"
49 :
50 : using namespace mozilla;
51 : using namespace js;
52 :
53 : bool
54 115444609 : JSString::isShort() const
55 : {
56 115444609 : bool is_short = (getAllocKind() == gc::FINALIZE_SHORT_STRING);
57 115444280 : JS_ASSERT_IF(is_short, isFlat());
58 115444280 : return is_short;
59 : }
60 :
61 : bool
62 1694366 : JSString::isFixed() const
63 : {
64 1694366 : return isFlat() && !isExtensible();
65 : }
66 :
67 : bool
68 60990 : JSString::isInline() const
69 : {
70 60990 : return isFixed() && (d.u1.chars == d.inlineStorage || isShort());
71 : }
72 :
73 : bool
74 1038206 : JSString::isExternal() const
75 : {
76 1038206 : bool is_external = (getAllocKind() == gc::FINALIZE_EXTERNAL_STRING);
77 1038206 : JS_ASSERT_IF(is_external, isFixed());
78 1038206 : return is_external;
79 : }
80 :
81 : size_t
82 61918 : JSString::sizeOfExcludingThis(JSMallocSizeOfFun mallocSizeOf)
83 : {
84 : /* JSRope: do nothing, we'll count all children chars when we hit the leaf strings. */
85 61918 : if (isRope())
86 178 : return 0;
87 :
88 61740 : JS_ASSERT(isLinear());
89 :
90 : /* JSDependentString: do nothing, we'll count the chars when we hit the base string. */
91 61740 : if (isDependent())
92 105 : return 0;
93 :
94 61635 : JS_ASSERT(isFlat());
95 :
96 : /* JSExtensibleString: count the full capacity, not just the used space. */
97 61635 : if (isExtensible()) {
98 437 : JSExtensibleString &extensible = asExtensible();
99 437 : return mallocSizeOf(extensible.chars());
100 : }
101 :
102 61198 : JS_ASSERT(isFixed());
103 :
104 : /* JSExternalString: don't count, the chars could be stored anywhere. */
105 61198 : if (isExternal())
106 208 : return 0;
107 :
108 : /* JSInlineString, JSShortString, JSInlineAtom, JSShortAtom: the chars are inline. */
109 60990 : if (isInline())
110 37036 : return 0;
111 :
112 : /* JSAtom, JSFixedString: count the chars. +1 for the null char. */
113 23954 : JSFixedString &fixed = asFixed();
114 23954 : return mallocSizeOf(fixed.chars());
115 : }
116 :
117 : #ifdef DEBUG
118 : void
119 0 : JSString::dump()
120 : {
121 0 : if (const jschar *chars = getChars(NULL)) {
122 : fprintf(stderr, "JSString* (%p) = jschar * (%p) = ",
123 0 : (void *) this, (void *) chars);
124 :
125 : extern void DumpChars(const jschar *s, size_t n);
126 0 : DumpChars(chars, length());
127 : } else {
128 0 : fprintf(stderr, "(oom in JSString::dump)");
129 : }
130 0 : fputc('\n', stderr);
131 0 : }
132 :
133 : bool
134 0 : JSString::equals(const char *s)
135 : {
136 0 : const jschar *c = getChars(NULL);
137 0 : if (!c) {
138 0 : fprintf(stderr, "OOM in JSString::equals!\n");
139 0 : return false;
140 : }
141 0 : while (*c && *s) {
142 0 : if (*c != *s)
143 0 : return false;
144 0 : c++;
145 0 : s++;
146 : }
147 0 : return *c == *s;
148 : }
149 : #endif /* DEBUG */
150 :
151 : static JS_ALWAYS_INLINE bool
152 1026387 : AllocChars(JSContext *maybecx, size_t length, jschar **chars, size_t *capacity)
153 : {
154 : /*
155 : * String length doesn't include the null char, so include it here before
156 : * doubling. Adding the null char after doubling would interact poorly with
157 : * round-up malloc schemes.
158 : */
159 1026387 : size_t numChars = length + 1;
160 :
161 : /*
162 : * Grow by 12.5% if the buffer is very large. Otherwise, round up to the
163 : * next power of 2. This is similar to what we do with arrays; see
164 : * JSObject::ensureDenseArrayElements.
165 : */
166 : static const size_t DOUBLING_MAX = 1024 * 1024;
167 1026387 : numChars = numChars > DOUBLING_MAX ? numChars + (numChars / 8) : RoundUpPow2(numChars);
168 :
169 : /* Like length, capacity does not include the null char, so take it out. */
170 1026387 : *capacity = numChars - 1;
171 :
172 : JS_STATIC_ASSERT(JSString::MAX_LENGTH * sizeof(jschar) < UINT32_MAX);
173 1026387 : size_t bytes = numChars * sizeof(jschar);
174 1026387 : *chars = (jschar *)(maybecx ? maybecx->malloc_(bytes) : OffTheBooks::malloc_(bytes));
175 1026387 : return *chars != NULL;
176 : }
177 :
178 : template<JSRope::UsingBarrier b>
179 : JSFlatString *
180 1116595 : JSRope::flattenInternal(JSContext *maybecx)
181 : {
182 : /*
183 : * Perform a depth-first dag traversal, splatting each node's characters
184 : * into a contiguous buffer. Visit each rope node three times:
185 : * 1. record position in the buffer and recurse into left child;
186 : * 2. recurse into the right child;
187 : * 3. transform the node into a dependent string.
188 : * To avoid maintaining a stack, tree nodes are mutated to indicate how many
189 : * times they have been visited. Since ropes can be dags, a node may be
190 : * encountered multiple times during traversal. However, step 3 above leaves
191 : * a valid dependent string, so everything works out. This algorithm is
192 : * homomorphic to marking code.
193 : *
194 : * While ropes avoid all sorts of quadratic cases with string
195 : * concatenation, they can't help when ropes are immediately flattened.
196 : * One idiomatic case that we'd like to keep linear (and has traditionally
197 : * been linear in SM and other JS engines) is:
198 : *
199 : * while (...) {
200 : * s += ...
201 : * s.flatten
202 : * }
203 : *
204 : * To do this, when the buffer for a to-be-flattened rope is allocated, the
205 : * allocation size is rounded up. Then, if the resulting flat string is the
206 : * left-hand side of a new rope that gets flattened and there is enough
207 : * capacity, the rope is flattened into the same buffer, thereby avoiding
208 : * copying the left-hand side. Clearing the 'extensible' bit turns off this
209 : * optimization. This is necessary, e.g., when the JSAPI hands out the raw
210 : * null-terminated char array of a flat string.
211 : *
212 : * N.B. This optimization can create chains of dependent strings.
213 : */
214 1116595 : const size_t wholeLength = length();
215 : size_t wholeCapacity;
216 : jschar *wholeChars;
217 1116595 : JSString *str = this;
218 : jschar *pos;
219 :
220 1116595 : if (this->leftChild()->isExtensible()) {
221 90576 : JSExtensibleString &left = this->leftChild()->asExtensible();
222 90576 : size_t capacity = left.capacity();
223 90576 : if (capacity >= wholeLength) {
224 : if (b == WithIncrementalBarrier) {
225 0 : JSString::writeBarrierPre(d.u1.left);
226 0 : JSString::writeBarrierPre(d.s.u2.right);
227 : }
228 :
229 90208 : wholeCapacity = capacity;
230 90208 : wholeChars = const_cast<jschar *>(left.chars());
231 90208 : size_t bits = left.d.lengthAndFlags;
232 90208 : pos = wholeChars + (bits >> LENGTH_SHIFT);
233 90208 : left.d.lengthAndFlags = bits ^ (EXTENSIBLE_FLAGS | DEPENDENT_BIT);
234 90208 : left.d.s.u2.base = (JSLinearString *)this; /* will be true on exit */
235 90208 : JSString::writeBarrierPost(left.d.s.u2.base, &left.d.s.u2.base);
236 90208 : goto visit_right_child;
237 : }
238 : }
239 :
240 1026387 : if (!AllocChars(maybecx, wholeLength, &wholeChars, &wholeCapacity))
241 0 : return NULL;
242 :
243 1026387 : pos = wholeChars;
244 : first_visit_node: {
245 : if (b == WithIncrementalBarrier) {
246 25 : JSString::writeBarrierPre(str->d.u1.left);
247 25 : JSString::writeBarrierPre(str->d.s.u2.right);
248 : }
249 :
250 7173195 : JSString &left = *str->d.u1.left;
251 7173195 : str->d.u1.chars = pos;
252 7173195 : if (left.isRope()) {
253 5454299 : left.d.s.u3.parent = str; /* Return to this when 'left' done, */
254 5454299 : left.d.lengthAndFlags = 0x200; /* but goto visit_right_child. */
255 5454299 : str = &left;
256 5454299 : goto first_visit_node;
257 : }
258 1718896 : size_t len = left.length();
259 1718896 : PodCopy(pos, left.d.u1.chars, len);
260 1718896 : pos += len;
261 : }
262 : visit_right_child: {
263 7263403 : JSString &right = *str->d.s.u2.right;
264 7263403 : if (right.isRope()) {
265 692509 : right.d.s.u3.parent = str; /* Return to this node when 'right' done, */
266 692509 : right.d.lengthAndFlags = 0x300; /* but goto finish_node. */
267 692509 : str = &right;
268 692509 : goto first_visit_node;
269 : }
270 6570894 : size_t len = right.length();
271 6570894 : PodCopy(pos, right.d.u1.chars, len);
272 6570894 : pos += len;
273 : }
274 : finish_node: {
275 7263403 : if (str == this) {
276 1116595 : JS_ASSERT(pos == wholeChars + wholeLength);
277 1116595 : *pos = '\0';
278 1116595 : str->d.lengthAndFlags = buildLengthAndFlags(wholeLength, EXTENSIBLE_FLAGS);
279 1116595 : str->d.u1.chars = wholeChars;
280 1116595 : str->d.s.u2.capacity = wholeCapacity;
281 1116595 : return &this->asFlat();
282 : }
283 6146808 : size_t progress = str->d.lengthAndFlags;
284 6146808 : str->d.lengthAndFlags = buildLengthAndFlags(pos - str->d.u1.chars, DEPENDENT_BIT);
285 6146808 : str->d.s.u2.base = (JSLinearString *)this; /* will be true on exit */
286 6146808 : JSString::writeBarrierPost(str->d.s.u2.base, &str->d.s.u2.base);
287 6146808 : str = str->d.s.u3.parent;
288 6146808 : if (progress == 0x200)
289 5454299 : goto visit_right_child;
290 692509 : JS_ASSERT(progress == 0x300);
291 692509 : goto finish_node;
292 : }
293 : }
294 :
295 : JSFlatString *
296 1116595 : JSRope::flatten(JSContext *maybecx)
297 : {
298 : #if JSGC_INCREMENTAL
299 1116595 : if (compartment()->needsBarrier())
300 16 : return flattenInternal<WithIncrementalBarrier>(maybecx);
301 : else
302 1116579 : return flattenInternal<NoBarrier>(maybecx);
303 : #else
304 : return flattenInternal<NoBarrier>(maybecx);
305 : #endif
306 : }
307 :
308 : JSString * JS_FASTCALL
309 24782406 : js_ConcatStrings(JSContext *cx, JSString *left, JSString *right)
310 : {
311 24782406 : JS_ASSERT_IF(!left->isAtom(), left->compartment() == cx->compartment);
312 24782406 : JS_ASSERT_IF(!right->isAtom(), right->compartment() == cx->compartment);
313 :
314 24782406 : size_t leftLen = left->length();
315 24782406 : if (leftLen == 0)
316 201618 : return right;
317 :
318 24580788 : size_t rightLen = right->length();
319 24580788 : if (rightLen == 0)
320 613715 : return left;
321 :
322 23967073 : size_t wholeLength = leftLen + rightLen;
323 23967073 : if (!JSString::validateLength(cx, wholeLength))
324 18 : return NULL;
325 :
326 23967055 : if (JSShortString::lengthFits(wholeLength)) {
327 1975791 : JSShortString *str = js_NewGCShortString(cx);
328 1975791 : if (!str)
329 0 : return NULL;
330 1975791 : const jschar *leftChars = left->getChars(cx);
331 1975791 : if (!leftChars)
332 0 : return NULL;
333 1975791 : const jschar *rightChars = right->getChars(cx);
334 1975791 : if (!rightChars)
335 0 : return NULL;
336 :
337 1975791 : jschar *buf = str->init(wholeLength);
338 1975791 : PodCopy(buf, leftChars, leftLen);
339 1975791 : PodCopy(buf + leftLen, rightChars, rightLen);
340 1975791 : buf[wholeLength] = 0;
341 1975791 : return str;
342 : }
343 :
344 21991264 : return JSRope::new_(cx, left, right, wholeLength);
345 : }
346 :
347 : JSFixedString *
348 26772 : JSDependentString::undepend(JSContext *cx)
349 : {
350 26772 : JS_ASSERT(JSString::isDependent());
351 :
352 : /*
353 : * We destroy the base() pointer in undepend, so we need a pre-barrier. We
354 : * don't need a post-barrier because there aren't any outgoing pointers
355 : * afterwards.
356 : */
357 26772 : JSString::writeBarrierPre(base());
358 :
359 26772 : size_t n = length();
360 26772 : size_t size = (n + 1) * sizeof(jschar);
361 26772 : jschar *s = (jschar *) cx->malloc_(size);
362 26772 : if (!s)
363 0 : return NULL;
364 :
365 26772 : PodCopy(s, chars(), n);
366 26772 : s[n] = 0;
367 :
368 26772 : d.lengthAndFlags = buildLengthAndFlags(n, FIXED_FLAGS);
369 26772 : d.u1.chars = s;
370 :
371 26772 : return &this->asFixed();
372 : }
373 :
374 : bool
375 102598769 : JSFlatString::isIndex(uint32_t *indexp) const
376 : {
377 102598769 : const jschar *s = charsZ();
378 102598769 : jschar ch = *s;
379 :
380 102598769 : if (!JS7_ISDEC(ch))
381 99073976 : return false;
382 :
383 3524793 : size_t n = length();
384 3524793 : if (n > UINT32_CHAR_BUFFER_LENGTH)
385 1788 : return false;
386 :
387 : /*
388 : * Make sure to account for the '\0' at the end of characters, dereferenced
389 : * in the loop below.
390 : */
391 3523005 : RangedPtr<const jschar> cp(s, n + 1);
392 3523005 : const RangedPtr<const jschar> end(s + n, s, n + 1);
393 :
394 3523005 : uint32_t index = JS7_UNDEC(*cp++);
395 3523005 : uint32_t oldIndex = 0;
396 3523005 : uint32_t c = 0;
397 :
398 3523005 : if (index != 0) {
399 14987613 : while (JS7_ISDEC(*cp)) {
400 8014279 : oldIndex = index;
401 8014279 : c = JS7_UNDEC(*cp);
402 8014279 : index = 10 * index + c;
403 8014279 : cp++;
404 : }
405 : }
406 :
407 : /* It's not an element if there are characters after the number. */
408 3523005 : if (cp != end)
409 770 : return false;
410 :
411 : /*
412 : * Look out for "4294967296" and larger-number strings that fit in
413 : * UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers shall pass.
414 : */
415 3522235 : if (oldIndex < UINT32_MAX / 10 || (oldIndex == UINT32_MAX / 10 && c <= (UINT32_MAX % 10))) {
416 3522139 : *indexp = index;
417 3522139 : return true;
418 : }
419 :
420 96 : return false;
421 : }
422 :
423 : /*
424 : * Set up some tools to make it easier to generate large tables. After constant
425 : * folding, for each n, Rn(0) is the comma-separated list R(0), R(1), ..., R(2^n-1).
426 : * Similary, Rn(k) (for any k and n) generates the list R(k), R(k+1), ..., R(k+2^n-1).
427 : * To use this, define R appropriately, then use Rn(0) (for some value of n), then
428 : * undefine R.
429 : */
430 : #define R2(n) R(n), R((n) + (1 << 0)), R((n) + (2 << 0)), R((n) + (3 << 0))
431 : #define R4(n) R2(n), R2((n) + (1 << 2)), R2((n) + (2 << 2)), R2((n) + (3 << 2))
432 : #define R6(n) R4(n), R4((n) + (1 << 4)), R4((n) + (2 << 4)), R4((n) + (3 << 4))
433 : #define R7(n) R6(n), R6((n) + (1 << 6))
434 :
435 : /*
436 : * This is used when we generate our table of short strings, so the compiler is
437 : * happier if we use |c| as few times as possible.
438 : */
439 : #define FROM_SMALL_CHAR(c) ((c) + ((c) < 10 ? '0' : \
440 : (c) < 36 ? 'a' - 10 : \
441 : 'A' - 36))
442 :
443 : /*
444 : * Declare length-2 strings. We only store strings where both characters are
445 : * alphanumeric. The lower 10 short chars are the numerals, the next 26 are
446 : * the lowercase letters, and the next 26 are the uppercase letters.
447 : */
448 : #define TO_SMALL_CHAR(c) ((c) >= '0' && (c) <= '9' ? (c) - '0' : \
449 : (c) >= 'a' && (c) <= 'z' ? (c) - 'a' + 10 : \
450 : (c) >= 'A' && (c) <= 'Z' ? (c) - 'A' + 36 : \
451 : StaticStrings::INVALID_SMALL_CHAR)
452 :
453 : #define R TO_SMALL_CHAR
454 : const StaticStrings::SmallChar StaticStrings::toSmallChar[] = { R7(0) };
455 : #undef R
456 :
457 : bool
458 19910 : StaticStrings::init(JSContext *cx)
459 : {
460 39820 : SwitchToCompartment sc(cx, cx->runtime->atomsCompartment);
461 :
462 5116870 : for (uint32_t i = 0; i < UNIT_STATIC_LIMIT; i++) {
463 5096960 : jschar buffer[] = { i, 0x00 };
464 5096960 : JSFixedString *s = js_NewStringCopyN(cx, buffer, 1);
465 5096960 : if (!s)
466 0 : return false;
467 5096960 : unitStaticTable[i] = s->morphAtomizedStringIntoAtom();
468 : }
469 :
470 81571270 : for (uint32_t i = 0; i < NUM_SMALL_CHARS * NUM_SMALL_CHARS; i++) {
471 81551360 : jschar buffer[] = { FROM_SMALL_CHAR(i >> 6), FROM_SMALL_CHAR(i & 0x3F), 0x00 };
472 81551360 : JSFixedString *s = js_NewStringCopyN(cx, buffer, 2);
473 81551360 : if (!s)
474 0 : return false;
475 81551360 : length2StaticTable[i] = s->morphAtomizedStringIntoAtom();
476 : }
477 :
478 5116870 : for (uint32_t i = 0; i < INT_STATIC_LIMIT; i++) {
479 5096960 : if (i < 10) {
480 199100 : intStaticTable[i] = unitStaticTable[i + '0'];
481 4897860 : } else if (i < 100) {
482 : size_t index = ((size_t)TO_SMALL_CHAR((i / 10) + '0') << 6) +
483 1791900 : TO_SMALL_CHAR((i % 10) + '0');
484 1791900 : intStaticTable[i] = length2StaticTable[index];
485 : } else {
486 3105960 : jschar buffer[] = { (i / 100) + '0', ((i / 10) % 10) + '0', (i % 10) + '0', 0x00 };
487 3105960 : JSFixedString *s = js_NewStringCopyN(cx, buffer, 3);
488 3105960 : if (!s)
489 0 : return false;
490 3105960 : intStaticTable[i] = s->morphAtomizedStringIntoAtom();
491 : }
492 : }
493 :
494 19910 : return true;
495 : }
496 :
497 : void
498 54466 : StaticStrings::trace(JSTracer *trc)
499 : {
500 : /* These strings never change, so barriers are not needed. */
501 :
502 13997762 : for (uint32_t i = 0; i < UNIT_STATIC_LIMIT; i++) {
503 13943296 : if (unitStaticTable[i])
504 13943296 : MarkStringUnbarriered(trc, &unitStaticTable[i], "unit-static-string");
505 : }
506 :
507 223147202 : for (uint32_t i = 0; i < NUM_SMALL_CHARS * NUM_SMALL_CHARS; i++) {
508 223092736 : if (length2StaticTable[i])
509 223092736 : MarkStringUnbarriered(trc, &length2StaticTable[i], "length2-static-string");
510 : }
511 :
512 : /* This may mark some strings more than once, but so be it. */
513 13997762 : for (uint32_t i = 0; i < INT_STATIC_LIMIT; i++) {
514 13943296 : if (intStaticTable[i])
515 13943296 : MarkStringUnbarriered(trc, &intStaticTable[i], "int-static-string");
516 : }
517 54466 : }
518 :
519 : bool
520 4237986 : StaticStrings::isStatic(JSAtom *atom)
521 : {
522 4237986 : const jschar *chars = atom->chars();
523 4237986 : switch (atom->length()) {
524 : case 1:
525 6 : return (chars[0] < UNIT_STATIC_LIMIT);
526 : case 2:
527 6745 : return (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1]));
528 : case 3:
529 55193 : if ('1' <= chars[0] && chars[0] <= '9' &&
530 2 : '0' <= chars[1] && chars[1] <= '9' &&
531 2 : '0' <= chars[2] && chars[2] <= '9') {
532 1 : int i = (chars[0] - '0') * 100 +
533 1 : (chars[1] - '0') * 10 +
534 2 : (chars[2] - '0');
535 :
536 1 : return (unsigned(i) < INT_STATIC_LIMIT);
537 : }
538 55188 : return false;
539 : default:
540 4176046 : return false;
541 : }
542 : }
543 :
544 : #ifdef DEBUG
545 : void
546 0 : JSAtom::dump()
547 : {
548 0 : fprintf(stderr, "JSAtom* (%p) = ", (void *) this);
549 0 : this->JSString::dump();
550 0 : }
551 : #endif /* DEBUG */
|