1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is Mozilla Communicator client code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : /*
39 : * A class which represents a fragment of text (eg inside a text
40 : * node); if only codepoints below 256 are used, the text is stored as
41 : * a char*; otherwise the text is stored as a PRUnichar*
42 : */
43 :
44 : #include "nsTextFragment.h"
45 : #include "nsCRT.h"
46 : #include "nsReadableUtils.h"
47 : #include "nsMemory.h"
48 : #include "nsBidiUtils.h"
49 : #include "nsUnicharUtils.h"
50 : #include "nsUTF8Utils.h"
51 : #include "mozilla/SSE.h"
52 :
53 : #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
54 : #define TEXTFRAG_MAX_NEWLINES 7
55 :
56 : // Static buffer used for common fragments
57 : static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
58 : static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
59 : static char sSingleCharSharedString[256];
60 :
61 : // static
62 : nsresult
63 1404 : nsTextFragment::Init()
64 : {
65 : // Create whitespace strings
66 : PRUint32 i;
67 12636 : for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
68 22464 : sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
69 22464 : sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
70 11232 : NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i],
71 : NS_ERROR_OUT_OF_MEMORY);
72 11232 : sSpaceSharedString[i][0] = ' ';
73 11232 : sTabSharedString[i][0] = ' ';
74 : PRUint32 j;
75 50544 : for (j = 1; j < 1 + i; ++j) {
76 39312 : sSpaceSharedString[i][j] = '\n';
77 39312 : sTabSharedString[i][j] = '\n';
78 : }
79 572832 : for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
80 561600 : sSpaceSharedString[i][j] = ' ';
81 561600 : sTabSharedString[i][j] = '\t';
82 : }
83 : }
84 :
85 : // Create single-char strings
86 360828 : for (i = 0; i < 256; ++i) {
87 359424 : sSingleCharSharedString[i] = i;
88 : }
89 :
90 1404 : return NS_OK;
91 : }
92 :
93 : // static
94 : void
95 1403 : nsTextFragment::Shutdown()
96 : {
97 : PRUint32 i;
98 12627 : for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
99 11224 : delete [] sSpaceSharedString[i];
100 11224 : delete [] sTabSharedString[i];
101 11224 : sSpaceSharedString[i] = nsnull;
102 11224 : sTabSharedString[i] = nsnull;
103 : }
104 1403 : }
105 :
106 74983 : nsTextFragment::~nsTextFragment()
107 : {
108 74983 : ReleaseText();
109 74983 : MOZ_COUNT_DTOR(nsTextFragment);
110 74983 : }
111 :
112 : void
113 149954 : nsTextFragment::ReleaseText()
114 : {
115 149954 : if (mState.mLength && m1b && mState.mInHeap) {
116 16870 : nsMemory::Free(m2b); // m1b == m2b as far as nsMemory is concerned
117 : }
118 :
119 149954 : m1b = nsnull;
120 149954 : mState.mIsBidi = false;
121 :
122 : // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
123 149954 : mAllBits = 0;
124 149954 : }
125 :
126 : nsTextFragment&
127 482 : nsTextFragment::operator=(const nsTextFragment& aOther)
128 : {
129 482 : ReleaseText();
130 :
131 482 : if (aOther.mState.mLength) {
132 475 : if (!aOther.mState.mInHeap) {
133 320 : m1b = aOther.m1b; // This will work even if aOther is using m2b
134 : }
135 : else {
136 : m2b = static_cast<PRUnichar*>
137 : (nsMemory::Clone(aOther.m2b, aOther.mState.mLength *
138 155 : (aOther.mState.mIs2b ? sizeof(PRUnichar) : sizeof(char))));
139 : }
140 :
141 475 : if (m1b) {
142 475 : mAllBits = aOther.mAllBits;
143 : }
144 : }
145 :
146 482 : return *this;
147 : }
148 :
149 : static inline PRInt32
150 0 : FirstNon8BitUnvectorized(const PRUnichar *str, const PRUnichar *end)
151 : {
152 : #if PR_BYTES_PER_WORD == 4
153 0 : const size_t mask = 0xff00ff00;
154 0 : const PRUint32 alignMask = 0x3;
155 0 : const PRUint32 numUnicharsPerWord = 2;
156 : #elif PR_BYTES_PER_WORD == 8
157 : const size_t mask = 0xff00ff00ff00ff00;
158 : const PRUint32 alignMask = 0x7;
159 : const PRUint32 numUnicharsPerWord = 4;
160 : #else
161 : #error Unknown platform!
162 : #endif
163 :
164 0 : const PRInt32 len = end - str;
165 0 : PRInt32 i = 0;
166 :
167 : // Align ourselves to a word boundary.
168 : PRInt32 alignLen =
169 0 : NS_MIN(len, PRInt32(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(PRUnichar)));
170 0 : for (; i < alignLen; i++) {
171 0 : if (str[i] > 255)
172 0 : return i;
173 : }
174 :
175 : // Check one word at a time.
176 0 : const PRInt32 wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
177 0 : for (; i < wordWalkEnd; i += numUnicharsPerWord) {
178 0 : const size_t word = *reinterpret_cast<const size_t*>(str + i);
179 0 : if (word & mask)
180 0 : return i;
181 : }
182 :
183 : // Take care of the remainder one character at a time.
184 0 : for (; i < len; i++) {
185 0 : if (str[i] > 255)
186 0 : return i;
187 : }
188 :
189 0 : return -1;
190 : }
191 :
192 : #ifdef MOZILLA_MAY_SUPPORT_SSE2
193 : namespace mozilla {
194 : namespace SSE2 {
195 : PRInt32 FirstNon8Bit(const PRUnichar *str, const PRUnichar *end);
196 : }
197 : }
198 : #endif
199 :
200 : /*
201 : * This function returns -1 if all characters in str are 8 bit characters.
202 : * Otherwise, it returns a value less than or equal to the index of the first
203 : * non-8bit character in str. For example, if first non-8bit character is at
204 : * position 25, it may return 25, or for example 24, or 16. But it guarantees
205 : * there is no non-8bit character before returned value.
206 : */
207 : static inline PRInt32
208 16726 : FirstNon8Bit(const PRUnichar *str, const PRUnichar *end)
209 : {
210 : #ifdef MOZILLA_MAY_SUPPORT_SSE2
211 16726 : if (mozilla::supports_sse2()) {
212 16726 : return mozilla::SSE2::FirstNon8Bit(str, end);
213 : }
214 : #endif
215 :
216 0 : return FirstNon8BitUnvectorized(str, end);
217 : }
218 :
219 : void
220 74489 : nsTextFragment::SetTo(const PRUnichar* aBuffer, PRInt32 aLength, bool aUpdateBidi)
221 : {
222 74489 : ReleaseText();
223 :
224 74489 : if (aLength == 0) {
225 53 : return;
226 : }
227 :
228 74436 : PRUnichar firstChar = *aBuffer;
229 74436 : if (aLength == 1 && firstChar < 256) {
230 4595 : m1b = sSingleCharSharedString + firstChar;
231 4595 : mState.mInHeap = false;
232 4595 : mState.mIs2b = false;
233 4595 : mState.mLength = 1;
234 :
235 4595 : return;
236 : }
237 :
238 69841 : const PRUnichar *ucp = aBuffer;
239 69841 : const PRUnichar *uend = aBuffer + aLength;
240 :
241 : // Check if we can use a shared string
242 69841 : if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
243 : (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
244 55353 : if (firstChar == ' ') {
245 2116 : ++ucp;
246 : }
247 :
248 55353 : const PRUnichar* start = ucp;
249 166222 : while (ucp < uend && *ucp == '\n') {
250 55516 : ++ucp;
251 : }
252 55353 : const PRUnichar* endNewLine = ucp;
253 :
254 55353 : PRUnichar space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
255 586587 : while (ucp < uend && *ucp == space) {
256 475881 : ++ucp;
257 : }
258 :
259 55353 : if (ucp == uend &&
260 : endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
261 : ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
262 53125 : char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
263 53125 : m1b = strings[endNewLine - start];
264 :
265 : // If we didn't find a space in the beginning, skip it now.
266 53125 : if (firstChar != ' ') {
267 52711 : ++m1b;
268 : }
269 :
270 53125 : mState.mInHeap = false;
271 53125 : mState.mIs2b = false;
272 53125 : mState.mLength = aLength;
273 :
274 53125 : return;
275 : }
276 : }
277 :
278 : // See if we need to store the data in ucs2 or not
279 16716 : PRInt32 first16bit = FirstNon8Bit(ucp, uend);
280 :
281 16716 : if (first16bit != -1) { // aBuffer contains no non-8bit character
282 : // Use ucs2 storage because we have to
283 : m2b = (PRUnichar *)nsMemory::Clone(aBuffer,
284 6 : aLength * sizeof(PRUnichar));
285 6 : if (!m2b) {
286 0 : return;
287 : }
288 :
289 6 : mState.mIs2b = true;
290 6 : if (aUpdateBidi) {
291 6 : UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
292 : }
293 :
294 : } else {
295 : // Use 1 byte storage because we can
296 16710 : char* buff = (char *)nsMemory::Alloc(aLength * sizeof(char));
297 16710 : if (!buff) {
298 0 : return;
299 : }
300 :
301 : // Copy data
302 16710 : LossyConvertEncoding16to8 converter(buff);
303 16710 : copy_string(aBuffer, aBuffer+aLength, converter);
304 16710 : m1b = buff;
305 16710 : mState.mIs2b = false;
306 : }
307 :
308 : // Setup our fields
309 16716 : mState.mInHeap = true;
310 16716 : mState.mLength = aLength;
311 : }
312 :
313 : void
314 20 : nsTextFragment::CopyTo(PRUnichar *aDest, PRInt32 aOffset, PRInt32 aCount)
315 : {
316 20 : NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
317 20 : NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
318 :
319 20 : if (aOffset < 0) {
320 0 : aOffset = 0;
321 : }
322 :
323 20 : if (PRUint32(aOffset + aCount) > GetLength()) {
324 0 : aCount = mState.mLength - aOffset;
325 : }
326 :
327 20 : if (aCount != 0) {
328 20 : if (mState.mIs2b) {
329 0 : memcpy(aDest, m2b + aOffset, sizeof(PRUnichar) * aCount);
330 : } else {
331 20 : const char *cp = m1b + aOffset;
332 20 : const char *end = cp + aCount;
333 20 : LossyConvertEncoding8to16 converter(aDest);
334 20 : copy_string(cp, end, converter);
335 : }
336 : }
337 20 : }
338 :
339 : void
340 10 : nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength, bool aUpdateBidi)
341 : {
342 : // This is a common case because some callsites create a textnode
343 : // with a value by creating the node and then calling AppendData.
344 10 : if (mState.mLength == 0) {
345 0 : SetTo(aBuffer, aLength, aUpdateBidi);
346 :
347 0 : return;
348 : }
349 :
350 : // Should we optimize for aData.Length() == 0?
351 :
352 10 : if (mState.mIs2b) {
353 : // Already a 2-byte string so the result will be too
354 0 : PRUnichar* buff = (PRUnichar*)nsMemory::Realloc(m2b, (mState.mLength + aLength) * sizeof(PRUnichar));
355 0 : if (!buff) {
356 0 : return;
357 : }
358 :
359 0 : memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(PRUnichar));
360 0 : mState.mLength += aLength;
361 0 : m2b = buff;
362 :
363 0 : if (aUpdateBidi) {
364 0 : UpdateBidiFlag(aBuffer, aLength);
365 : }
366 :
367 0 : return;
368 : }
369 :
370 : // Current string is a 1-byte string, check if the new data fits in one byte too.
371 10 : PRInt32 first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);
372 :
373 10 : if (first16bit != -1) { // aBuffer contains no non-8bit character
374 : // The old data was 1-byte, but the new is not so we have to expand it
375 : // all to 2-byte
376 : PRUnichar* buff = (PRUnichar*)nsMemory::Alloc((mState.mLength + aLength) *
377 0 : sizeof(PRUnichar));
378 0 : if (!buff) {
379 0 : return;
380 : }
381 :
382 : // Copy data into buff
383 0 : LossyConvertEncoding8to16 converter(buff);
384 0 : copy_string(m1b, m1b+mState.mLength, converter);
385 :
386 0 : memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(PRUnichar));
387 0 : mState.mLength += aLength;
388 0 : mState.mIs2b = true;
389 :
390 0 : if (mState.mInHeap) {
391 0 : nsMemory::Free(m2b);
392 : }
393 0 : m2b = buff;
394 :
395 0 : mState.mInHeap = true;
396 :
397 0 : if (aUpdateBidi) {
398 0 : UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
399 : }
400 :
401 0 : return;
402 : }
403 :
404 : // The new and the old data is all 1-byte
405 : char* buff;
406 10 : if (mState.mInHeap) {
407 : buff = (char*)nsMemory::Realloc(const_cast<char*>(m1b),
408 9 : (mState.mLength + aLength) * sizeof(char));
409 9 : if (!buff) {
410 0 : return;
411 : }
412 : }
413 : else {
414 1 : buff = (char*)nsMemory::Alloc((mState.mLength + aLength) * sizeof(char));
415 1 : if (!buff) {
416 0 : return;
417 : }
418 :
419 1 : memcpy(buff, m1b, mState.mLength);
420 1 : mState.mInHeap = true;
421 : }
422 :
423 : // Copy aBuffer into buff.
424 10 : LossyConvertEncoding16to8 converter(buff + mState.mLength);
425 10 : copy_string(aBuffer, aBuffer + aLength, converter);
426 :
427 10 : m1b = buff;
428 10 : mState.mLength += aLength;
429 :
430 : }
431 :
432 : /* virtual */ size_t
433 0 : nsTextFragment::SizeOfExcludingThis(nsMallocSizeOfFun aMallocSizeOf) const
434 : {
435 0 : if (Is2b()) {
436 0 : return aMallocSizeOf(m2b);
437 : }
438 :
439 0 : if (mState.mInHeap) {
440 0 : return aMallocSizeOf(m1b);
441 : }
442 :
443 0 : return 0;
444 : }
445 :
446 : // To save time we only do this when we really want to know, not during
447 : // every allocation
448 : void
449 6 : nsTextFragment::UpdateBidiFlag(const PRUnichar* aBuffer, PRUint32 aLength)
450 : {
451 6 : if (mState.mIs2b && !mState.mIsBidi) {
452 6 : const PRUnichar* cp = aBuffer;
453 6 : const PRUnichar* end = cp + aLength;
454 69 : while (cp < end) {
455 61 : PRUnichar ch1 = *cp++;
456 61 : PRUint32 utf32Char = ch1;
457 61 : if (NS_IS_HIGH_SURROGATE(ch1) &&
458 : cp < end &&
459 : NS_IS_LOW_SURROGATE(*cp)) {
460 3 : PRUnichar ch2 = *cp++;
461 3 : utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
462 : }
463 61 : if (UTF32_CHAR_IS_BIDI(utf32Char) || IS_BIDI_CONTROL_CHAR(utf32Char)) {
464 4 : mState.mIsBidi = true;
465 4 : break;
466 : }
467 : }
468 : }
469 6 : }
|