1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Robert O'Callahan <robert@ocallahan.org>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either the GNU General Public License Version 2 or later (the "GPL"), or
27 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 : #include "nsLineBreaker.h"
40 : #include "nsContentUtils.h"
41 : #include "nsILineBreaker.h"
42 : #include "gfxFont.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
43 : #include "nsHyphenationManager.h"
44 : #include "nsHyphenator.h"
45 :
46 0 : nsLineBreaker::nsLineBreaker()
47 : : mCurrentWordLangGroup(nsnull),
48 : mCurrentWordContainsMixedLang(false),
49 : mCurrentWordContainsComplexChar(false),
50 0 : mAfterBreakableSpace(false), mBreakHere(false)
51 : {
52 0 : }
53 :
54 0 : nsLineBreaker::~nsLineBreaker()
55 : {
56 0 : NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
57 0 : }
58 :
59 : static void
60 0 : SetupCapitalization(const PRUnichar* aWord, PRUint32 aLength,
61 : bool* aCapitalization)
62 : {
63 : // Capitalize the first non-punctuation character after a space or start
64 : // of the word.
65 : // The only space character a word can contain is NBSP.
66 0 : bool capitalizeNextChar = true;
67 0 : for (PRUint32 i = 0; i < aLength; ++i) {
68 0 : if (capitalizeNextChar && !nsContentUtils::IsFirstLetterPunctuation(aWord[i])) {
69 0 : aCapitalization[i] = true;
70 0 : capitalizeNextChar = false;
71 : }
72 0 : if (aWord[i] == 0xA0 /*NBSP*/) {
73 0 : capitalizeNextChar = true;
74 : }
75 : }
76 0 : }
77 :
78 : nsresult
79 0 : nsLineBreaker::FlushCurrentWord()
80 : {
81 0 : PRUint32 length = mCurrentWord.Length();
82 0 : nsAutoTArray<PRUint8,4000> breakState;
83 0 : if (!breakState.AppendElements(length))
84 0 : return NS_ERROR_OUT_OF_MEMORY;
85 :
86 0 : nsTArray<bool> capitalizationState;
87 :
88 0 : if (!mCurrentWordContainsComplexChar) {
89 : // Just set everything internal to "no break"!
90 0 : memset(breakState.Elements(),
91 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
92 0 : length*sizeof(PRUint8));
93 : } else {
94 0 : nsContentUtils::LineBreaker()->
95 0 : GetJISx4051Breaks(mCurrentWord.Elements(), length, breakState.Elements());
96 : }
97 :
98 : bool autoHyphenate = mCurrentWordLangGroup &&
99 0 : !mCurrentWordContainsMixedLang;
100 : PRUint32 i;
101 0 : for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
102 0 : TextItem* ti = &mTextItems[i];
103 0 : if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
104 0 : autoHyphenate = false;
105 : }
106 : }
107 0 : if (autoHyphenate) {
108 : nsRefPtr<nsHyphenator> hyphenator =
109 0 : nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLangGroup);
110 0 : if (hyphenator) {
111 : FindHyphenationPoints(hyphenator,
112 0 : mCurrentWord.Elements(),
113 0 : mCurrentWord.Elements() + length,
114 0 : breakState.Elements());
115 : }
116 : }
117 :
118 0 : PRUint32 offset = 0;
119 0 : for (i = 0; i < mTextItems.Length(); ++i) {
120 0 : TextItem* ti = &mTextItems[i];
121 0 : NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
122 :
123 0 : if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
124 0 : breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
125 : }
126 0 : if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
127 0 : PRUint32 exclude = ti->mSinkOffset == 0 ? 1 : 0;
128 0 : memset(breakState.Elements() + offset + exclude,
129 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
130 0 : (ti->mLength - exclude)*sizeof(PRUint8));
131 : }
132 :
133 : // Don't set the break state for the first character of the word, because
134 : // it was already set correctly earlier and we don't know what the true
135 : // value should be.
136 0 : PRUint32 skipSet = i == 0 ? 1 : 0;
137 0 : if (ti->mSink) {
138 : ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
139 0 : breakState.Elements() + offset + skipSet);
140 :
141 0 : if (ti->mFlags & BREAK_NEED_CAPITALIZATION) {
142 0 : if (capitalizationState.Length() == 0) {
143 0 : if (!capitalizationState.AppendElements(length))
144 0 : return NS_ERROR_OUT_OF_MEMORY;
145 0 : memset(capitalizationState.Elements(), false, length*sizeof(bool));
146 0 : SetupCapitalization(mCurrentWord.Elements(), length,
147 0 : capitalizationState.Elements());
148 : }
149 : ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
150 0 : capitalizationState.Elements() + offset);
151 : }
152 : }
153 :
154 0 : offset += ti->mLength;
155 : }
156 :
157 0 : mCurrentWord.Clear();
158 0 : mTextItems.Clear();
159 0 : mCurrentWordContainsComplexChar = false;
160 0 : mCurrentWordContainsMixedLang = false;
161 0 : mCurrentWordLangGroup = nsnull;
162 0 : return NS_OK;
163 : }
164 :
165 : nsresult
166 0 : nsLineBreaker::AppendText(nsIAtom* aLangGroup, const PRUnichar* aText, PRUint32 aLength,
167 : PRUint32 aFlags, nsILineBreakSink* aSink)
168 : {
169 0 : NS_ASSERTION(aLength > 0, "Appending empty text...");
170 :
171 0 : PRUint32 offset = 0;
172 :
173 : // Continue the current word
174 0 : if (mCurrentWord.Length() > 0) {
175 0 : NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
176 :
177 0 : while (offset < aLength && !IsSpace(aText[offset])) {
178 0 : mCurrentWord.AppendElement(aText[offset]);
179 0 : if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
180 0 : mCurrentWordContainsComplexChar = true;
181 : }
182 0 : UpdateCurrentWordLangGroup(aLangGroup);
183 0 : ++offset;
184 : }
185 :
186 0 : if (offset > 0) {
187 0 : mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
188 : }
189 :
190 0 : if (offset == aLength)
191 0 : return NS_OK;
192 :
193 : // We encountered whitespace, so we're done with this word
194 0 : nsresult rv = FlushCurrentWord();
195 0 : if (NS_FAILED(rv))
196 0 : return rv;
197 : }
198 :
199 0 : nsAutoTArray<PRUint8,4000> breakState;
200 0 : if (aSink) {
201 0 : if (!breakState.AppendElements(aLength))
202 0 : return NS_ERROR_OUT_OF_MEMORY;
203 : }
204 :
205 0 : nsTArray<bool> capitalizationState;
206 0 : if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
207 0 : if (!capitalizationState.AppendElements(aLength))
208 0 : return NS_ERROR_OUT_OF_MEMORY;
209 0 : memset(capitalizationState.Elements(), false, aLength*sizeof(bool));
210 : }
211 :
212 0 : PRUint32 start = offset;
213 : bool noBreaksNeeded = !aSink ||
214 : (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) &&
215 0 : !mBreakHere && !mAfterBreakableSpace);
216 0 : if (noBreaksNeeded) {
217 : // Skip to the space before the last word, since either the break data
218 : // here is not needed, or no breaks are set in the sink and there cannot
219 : // be any breaks in this chunk; all we need is the context for the next
220 : // chunk (if any)
221 0 : offset = aLength;
222 0 : while (offset > start) {
223 0 : --offset;
224 0 : if (IsSpace(aText[offset]))
225 0 : break;
226 : }
227 : }
228 0 : PRUint32 wordStart = offset;
229 0 : bool wordHasComplexChar = false;
230 :
231 0 : nsRefPtr<nsHyphenator> hyphenator;
232 0 : if ((aFlags & BREAK_USE_AUTO_HYPHENATION) && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
233 0 : hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aLangGroup);
234 : }
235 :
236 0 : for (;;) {
237 0 : PRUnichar ch = aText[offset];
238 0 : bool isSpace = IsSpace(ch);
239 0 : bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
240 :
241 0 : if (aSink) {
242 0 : breakState[offset] =
243 0 : mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ?
244 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
245 0 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
246 : }
247 0 : mBreakHere = false;
248 0 : mAfterBreakableSpace = isBreakableSpace;
249 :
250 0 : if (isSpace) {
251 0 : if (offset > wordStart && aSink) {
252 0 : if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
253 0 : if (wordHasComplexChar) {
254 : // Save current start-of-word state because GetJISx4051Breaks will
255 : // set it to false
256 0 : PRUint8 currentStart = breakState[wordStart];
257 0 : nsContentUtils::LineBreaker()->
258 : GetJISx4051Breaks(aText + wordStart, offset - wordStart,
259 0 : breakState.Elements() + wordStart);
260 0 : breakState[wordStart] = currentStart;
261 : }
262 0 : if (hyphenator) {
263 : FindHyphenationPoints(hyphenator,
264 : aText + wordStart, aText + offset,
265 0 : breakState.Elements() + wordStart);
266 : }
267 : }
268 0 : if (aFlags & BREAK_NEED_CAPITALIZATION) {
269 : SetupCapitalization(aText + wordStart, offset - wordStart,
270 0 : capitalizationState.Elements() + wordStart);
271 : }
272 : }
273 0 : wordHasComplexChar = false;
274 0 : ++offset;
275 0 : if (offset >= aLength)
276 0 : break;
277 0 : wordStart = offset;
278 : } else {
279 0 : if (!wordHasComplexChar && IsComplexChar(ch)) {
280 0 : wordHasComplexChar = true;
281 : }
282 0 : ++offset;
283 0 : if (offset >= aLength) {
284 : // Save this word
285 0 : mCurrentWordContainsComplexChar = wordHasComplexChar;
286 0 : PRUint32 len = offset - wordStart;
287 0 : PRUnichar* elems = mCurrentWord.AppendElements(len);
288 0 : if (!elems)
289 0 : return NS_ERROR_OUT_OF_MEMORY;
290 0 : memcpy(elems, aText + wordStart, sizeof(PRUnichar)*len);
291 0 : mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
292 : // Ensure that the break-before for this word is written out
293 0 : offset = wordStart + 1;
294 0 : UpdateCurrentWordLangGroup(aLangGroup);
295 0 : break;
296 : }
297 : }
298 : }
299 :
300 0 : if (!noBreaksNeeded) {
301 : // aSink must not be null
302 0 : aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
303 0 : if (aFlags & BREAK_NEED_CAPITALIZATION) {
304 : aSink->SetCapitalization(start, offset - start,
305 0 : capitalizationState.Elements() + start);
306 : }
307 : }
308 0 : return NS_OK;
309 : }
310 :
311 : void
312 0 : nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator,
313 : const PRUnichar *aTextStart,
314 : const PRUnichar *aTextLimit,
315 : PRUint8 *aBreakState)
316 : {
317 0 : nsDependentSubstring string(aTextStart, aTextLimit);
318 0 : nsAutoTArray<bool,200> hyphens;
319 0 : if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
320 0 : for (PRUint32 i = 0; i + 1 < string.Length(); ++i) {
321 0 : if (hyphens[i]) {
322 0 : aBreakState[i + 1] =
323 0 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
324 : }
325 : }
326 : }
327 0 : }
328 :
329 : nsresult
330 0 : nsLineBreaker::AppendText(nsIAtom* aLangGroup, const PRUint8* aText, PRUint32 aLength,
331 : PRUint32 aFlags, nsILineBreakSink* aSink)
332 : {
333 0 : NS_ASSERTION(aLength > 0, "Appending empty text...");
334 :
335 0 : if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
336 : // Defer to the Unicode path if capitalization or hyphenation is required
337 0 : nsAutoString str;
338 0 : const char* cp = reinterpret_cast<const char*>(aText);
339 0 : CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
340 0 : return AppendText(aLangGroup, str.get(), aLength, aFlags, aSink);
341 : }
342 :
343 0 : PRUint32 offset = 0;
344 :
345 : // Continue the current word
346 0 : if (mCurrentWord.Length() > 0) {
347 0 : NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
348 :
349 0 : while (offset < aLength && !IsSpace(aText[offset])) {
350 0 : mCurrentWord.AppendElement(aText[offset]);
351 0 : if (!mCurrentWordContainsComplexChar &&
352 0 : IsComplexASCIIChar(aText[offset])) {
353 0 : mCurrentWordContainsComplexChar = true;
354 : }
355 0 : ++offset;
356 : }
357 :
358 0 : if (offset > 0) {
359 0 : mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
360 : }
361 :
362 0 : if (offset == aLength) {
363 : // We did not encounter whitespace so the word hasn't finished yet.
364 0 : return NS_OK;
365 : }
366 :
367 : // We encountered whitespace, so we're done with this word
368 0 : nsresult rv = FlushCurrentWord();
369 0 : if (NS_FAILED(rv))
370 0 : return rv;
371 : }
372 :
373 0 : nsAutoTArray<PRUint8,4000> breakState;
374 0 : if (aSink) {
375 0 : if (!breakState.AppendElements(aLength))
376 0 : return NS_ERROR_OUT_OF_MEMORY;
377 : }
378 :
379 0 : PRUint32 start = offset;
380 : bool noBreaksNeeded = !aSink ||
381 : (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) &&
382 0 : !mBreakHere && !mAfterBreakableSpace);
383 0 : if (noBreaksNeeded) {
384 : // Skip to the space before the last word, since either the break data
385 : // here is not needed, or no breaks are set in the sink and there cannot
386 : // be any breaks in this chunk; all we need is the context for the next
387 : // chunk (if any)
388 0 : offset = aLength;
389 0 : while (offset > start) {
390 0 : --offset;
391 0 : if (IsSpace(aText[offset]))
392 0 : break;
393 : }
394 : }
395 0 : PRUint32 wordStart = offset;
396 0 : bool wordHasComplexChar = false;
397 :
398 0 : for (;;) {
399 0 : PRUint8 ch = aText[offset];
400 0 : bool isSpace = IsSpace(ch);
401 0 : bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
402 :
403 0 : if (aSink) {
404 0 : breakState[offset] =
405 0 : mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ?
406 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
407 0 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
408 : }
409 0 : mBreakHere = false;
410 0 : mAfterBreakableSpace = isBreakableSpace;
411 :
412 0 : if (isSpace) {
413 0 : if (offset > wordStart && wordHasComplexChar) {
414 0 : if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
415 : // Save current start-of-word state because GetJISx4051Breaks will
416 : // set it to false
417 0 : PRUint8 currentStart = breakState[wordStart];
418 0 : nsContentUtils::LineBreaker()->
419 : GetJISx4051Breaks(aText + wordStart, offset - wordStart,
420 0 : breakState.Elements() + wordStart);
421 0 : breakState[wordStart] = currentStart;
422 : }
423 0 : wordHasComplexChar = false;
424 : }
425 :
426 0 : ++offset;
427 0 : if (offset >= aLength)
428 0 : break;
429 0 : wordStart = offset;
430 : } else {
431 0 : if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
432 0 : wordHasComplexChar = true;
433 : }
434 0 : ++offset;
435 0 : if (offset >= aLength) {
436 : // Save this word
437 0 : mCurrentWordContainsComplexChar = wordHasComplexChar;
438 0 : PRUint32 len = offset - wordStart;
439 0 : PRUnichar* elems = mCurrentWord.AppendElements(len);
440 0 : if (!elems)
441 0 : return NS_ERROR_OUT_OF_MEMORY;
442 : PRUint32 i;
443 0 : for (i = wordStart; i < offset; ++i) {
444 0 : elems[i - wordStart] = aText[i];
445 : }
446 0 : mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
447 : // Ensure that the break-before for this word is written out
448 0 : offset = wordStart + 1;
449 0 : break;
450 : }
451 : }
452 : }
453 :
454 0 : if (!noBreaksNeeded) {
455 0 : aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
456 : }
457 0 : return NS_OK;
458 : }
459 :
460 : void
461 0 : nsLineBreaker::UpdateCurrentWordLangGroup(nsIAtom *aLangGroup)
462 : {
463 0 : if (mCurrentWordLangGroup && mCurrentWordLangGroup != aLangGroup) {
464 0 : mCurrentWordContainsMixedLang = true;
465 : } else {
466 0 : mCurrentWordLangGroup = aLangGroup;
467 : }
468 0 : }
469 :
470 : nsresult
471 0 : nsLineBreaker::AppendInvisibleWhitespace(PRUint32 aFlags)
472 : {
473 0 : nsresult rv = FlushCurrentWord();
474 0 : if (NS_FAILED(rv))
475 0 : return rv;
476 :
477 0 : bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
478 0 : if (mAfterBreakableSpace && !isBreakableSpace) {
479 0 : mBreakHere = true;
480 : }
481 0 : mAfterBreakableSpace = isBreakableSpace;
482 0 : return NS_OK;
483 : }
484 :
485 : nsresult
486 0 : nsLineBreaker::Reset(bool* aTrailingBreak)
487 : {
488 0 : nsresult rv = FlushCurrentWord();
489 0 : if (NS_FAILED(rv))
490 0 : return rv;
491 :
492 0 : *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
493 0 : mBreakHere = false;
494 0 : mAfterBreakableSpace = false;
495 0 : return NS_OK;
496 : }
|