1 : /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is Mozilla Corporation code.
16 : *
17 : * The Initial Developer of the Original Code is Mozilla Corporation.
18 : * Portions created by the Initial Developer are Copyright (C) 2010
19 : * the Initial Developer. All Rights Reserved.
20 : *
21 : * Contributor(s):
22 : * Jonathan Kew <jfkthame@gmail.com>
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either the GNU General Public License Version 2 or later (the "GPL"), or
26 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : *
37 : * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
38 : * for use within Mozilla Gecko, separate from a standard ICU build.
39 : *
40 : * The original ICU license of the code follows:
41 : *
42 : * ICU License - ICU 1.8.1 and later
43 : *
44 : * COPYRIGHT AND PERMISSION NOTICE
45 : *
46 : * Copyright (c) 1995-2009 International Business Machines Corporation and
47 : * others
48 : *
49 : * All rights reserved.
50 : *
51 : * Permission is hereby granted, free of charge, to any person obtaining a
52 : * copy of this software and associated documentation files (the "Software"),
53 : * to deal in the Software without restriction, including without limitation
54 : * the rights to use, copy, modify, merge, publish, distribute, and/or sell
55 : * copies of the Software, and to permit persons to whom the Software is
56 : * furnished to do so, provided that the above copyright notice(s) and this
57 : * permission notice appear in all copies of the Software and that both the
58 : * above copyright notice(s) and this permission notice appear in supporting
59 : * documentation.
60 : *
61 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
62 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
63 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
64 : * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
65 : * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
66 : * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
67 : * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
68 : * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
69 : * SOFTWARE.
70 : *
71 : * Except as contained in this notice, the name of a copyright holder shall
72 : * not be used in advertising or otherwise to promote the sale, use or other
73 : * dealings in this Software without prior written authorization of the
74 : * copyright holder.
75 : *
76 : * All trademarks and registered trademarks mentioned herein are the property
77 : * of their respective owners.
78 : */
79 :
80 : #include "gfxScriptItemizer.h"
81 : #include "gfxFontUtils.h" // for the FindHighestBit function
82 : #include "nsUnicodeProperties.h"
83 :
84 : #include "nsCharTraits.h"
85 :
86 : #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
87 :
88 : #define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
89 : #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
90 : #define INC(sp,count) (MOD((sp) + (count)))
91 : #define INC1(sp) (INC(sp, 1))
92 : #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
93 : #define DEC1(sp) (DEC(sp, 1))
94 : #define STACK_IS_EMPTY() (pushCount <= 0)
95 : #define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY())
96 : #define TOP() (parenStack[parenSP])
97 : #define SYNC_FIXUP() (fixupCount = 0)
98 :
99 :
100 : static const PRUint16 pairedChars[] = {
101 : 0x0028, 0x0029, /* ascii paired punctuation */
102 : 0x003c, 0x003e,
103 : 0x005b, 0x005d,
104 : 0x007b, 0x007d,
105 : 0x00ab, 0x00bb, /* guillemets */
106 : 0x2018, 0x2019, /* general punctuation */
107 : 0x201c, 0x201d,
108 : 0x2039, 0x203a,
109 : 0x207d, 0x207e, /* superscripts and subscripts */
110 : 0x208d, 0x208e,
111 : 0x275b, 0x275c, /* dingbat quotes and brackets */
112 : 0x275d, 0x275e,
113 : 0x2768, 0x2769,
114 : 0x276a, 0x276b,
115 : 0x276c, 0x276d,
116 : 0x276e, 0x276f,
117 : 0x2770, 0x2771,
118 : 0x2772, 0x2773,
119 : 0x2774, 0x2775,
120 : /* omitted: lots of potentially-paired math symbols */
121 : 0x2e22, 0x2e23, /* supplemental punctuation */
122 : 0x2e24, 0x2e25,
123 : 0x2e26, 0x2e27,
124 : 0x2e28, 0x2e29,
125 : 0x3008, 0x3009, /* chinese paired punctuation */
126 : 0x300a, 0x300b,
127 : 0x300c, 0x300d,
128 : 0x300e, 0x300f,
129 : 0x3010, 0x3011,
130 : 0x3014, 0x3015,
131 : 0x3016, 0x3017,
132 : 0x3018, 0x3019,
133 : 0x301a, 0x301b,
134 : 0xfe59, 0xfe5a, /* small form variants */
135 : 0xfe5b, 0xfe5c,
136 : 0xfe5d, 0xfe5e,
137 : 0xfe64, 0xfe65,
138 : 0xff08, 0xff09, /* half-width and full-width forms */
139 : 0xff1c, 0xff1e,
140 : 0xff3b, 0xff3d,
141 : 0xff5b, 0xff5d,
142 : 0xff5f, 0xff60,
143 : 0xff62, 0xff63
144 : };
145 :
146 : void
147 0 : gfxScriptItemizer::push(PRInt32 pairIndex, PRInt32 scriptCode)
148 : {
149 0 : pushCount = LIMIT_INC(pushCount);
150 0 : fixupCount = LIMIT_INC(fixupCount);
151 :
152 0 : parenSP = INC1(parenSP);
153 0 : parenStack[parenSP].pairIndex = pairIndex;
154 0 : parenStack[parenSP].scriptCode = scriptCode;
155 0 : }
156 :
157 : void
158 0 : gfxScriptItemizer::pop()
159 : {
160 0 : if (STACK_IS_EMPTY()) {
161 0 : return;
162 : }
163 :
164 0 : if (fixupCount > 0) {
165 0 : fixupCount -= 1;
166 : }
167 :
168 0 : pushCount -= 1;
169 0 : parenSP = DEC1(parenSP);
170 :
171 : /* If the stack is now empty, reset the stack
172 : pointers to their initial values.
173 : */
174 0 : if (STACK_IS_EMPTY()) {
175 0 : parenSP = -1;
176 : }
177 : }
178 :
179 : void
180 0 : gfxScriptItemizer::fixup(PRInt32 scriptCode)
181 : {
182 0 : PRInt32 fixupSP = DEC(parenSP, fixupCount);
183 :
184 0 : while (fixupCount-- > 0) {
185 0 : fixupSP = INC1(fixupSP);
186 0 : parenStack[fixupSP].scriptCode = scriptCode;
187 : }
188 0 : }
189 :
190 : static PRInt32
191 0 : getPairIndex(PRUint32 ch)
192 : {
193 0 : PRInt32 pairedCharCount = ARRAY_SIZE(pairedChars);
194 0 : PRInt32 pairedCharPower = mozilla::FindHighestBit(pairedCharCount);
195 0 : PRInt32 pairedCharExtra = pairedCharCount - pairedCharPower;
196 :
197 0 : PRInt32 probe = pairedCharPower;
198 0 : PRInt32 pairIndex = 0;
199 :
200 0 : if (ch >= pairedChars[pairedCharExtra]) {
201 0 : pairIndex = pairedCharExtra;
202 : }
203 :
204 0 : while (probe > 1) {
205 0 : probe >>= 1;
206 :
207 0 : if (ch >= pairedChars[pairIndex + probe]) {
208 0 : pairIndex += probe;
209 : }
210 : }
211 :
212 0 : if (pairedChars[pairIndex] != ch) {
213 0 : pairIndex = -1;
214 : }
215 :
216 0 : return pairIndex;
217 : }
218 :
219 : static bool
220 0 : sameScript(PRInt32 runScript, PRInt32 currCharScript)
221 : {
222 : return runScript <= MOZ_SCRIPT_INHERITED ||
223 : currCharScript <= MOZ_SCRIPT_INHERITED ||
224 0 : currCharScript == runScript;
225 : }
226 :
227 0 : gfxScriptItemizer::gfxScriptItemizer(const PRUnichar *src, PRUint32 length)
228 0 : : textPtr(src), textLength(length)
229 : {
230 0 : reset();
231 0 : }
232 :
233 : void
234 0 : gfxScriptItemizer::SetText(const PRUnichar *src, PRUint32 length)
235 : {
236 0 : textPtr = src;
237 0 : textLength = length;
238 :
239 0 : reset();
240 0 : }
241 :
242 : bool
243 0 : gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit,
244 : PRInt32& aRunScript)
245 : {
246 : /* if we've fallen off the end of the text, we're done */
247 0 : if (scriptLimit >= textLength) {
248 0 : return false;
249 : }
250 :
251 0 : SYNC_FIXUP();
252 0 : scriptCode = MOZ_SCRIPT_COMMON;
253 :
254 0 : for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
255 : PRUint32 ch;
256 : PRInt32 sc;
257 : PRInt32 pairIndex;
258 0 : PRUint32 startOfChar = scriptLimit;
259 :
260 0 : ch = textPtr[scriptLimit];
261 :
262 : /*
263 : * MODIFICATION for Gecko - clear the paired-character stack
264 : * when we see a space character, because we cannot trust
265 : * context outside the current "word" when doing textrun
266 : * construction
267 : */
268 0 : if (ch == 0x20) {
269 0 : while (STACK_IS_NOT_EMPTY()) {
270 0 : pop();
271 : }
272 0 : sc = MOZ_SCRIPT_COMMON;
273 0 : pairIndex = -1;
274 : } else {
275 : /* decode UTF-16 (may be surrogate pair) */
276 0 : if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
277 0 : PRUint32 low = textPtr[scriptLimit + 1];
278 0 : if (NS_IS_LOW_SURROGATE(low)) {
279 0 : ch = SURROGATE_TO_UCS4(ch, low);
280 0 : scriptLimit += 1;
281 : }
282 : }
283 :
284 0 : sc = mozilla::unicode::GetScriptCode(ch);
285 :
286 0 : pairIndex = getPairIndex(ch);
287 :
288 : /*
289 : * Paired character handling:
290 : *
291 : * if it's an open character, push it onto the stack.
292 : * if it's a close character, find the matching open on the
293 : * stack, and use that script code. Any non-matching open
294 : * characters above it on the stack will be poped.
295 : */
296 0 : if (pairIndex >= 0) {
297 0 : if ((pairIndex & 1) == 0) {
298 0 : push(pairIndex, scriptCode);
299 : } else {
300 0 : PRInt32 pi = pairIndex & ~1;
301 :
302 0 : while (STACK_IS_NOT_EMPTY() && TOP().pairIndex != pi) {
303 0 : pop();
304 : }
305 :
306 0 : if (STACK_IS_NOT_EMPTY()) {
307 0 : sc = TOP().scriptCode;
308 : }
309 : }
310 : }
311 : }
312 :
313 0 : if (sameScript(scriptCode, sc)) {
314 0 : if (scriptCode <= MOZ_SCRIPT_INHERITED &&
315 : sc > MOZ_SCRIPT_INHERITED)
316 : {
317 0 : scriptCode = sc;
318 0 : fixup(scriptCode);
319 : }
320 :
321 : /*
322 : * if this character is a close paired character,
323 : * pop the matching open character from the stack
324 : */
325 0 : if (pairIndex >= 0 && (pairIndex & 1) != 0) {
326 0 : pop();
327 : }
328 : } else {
329 : /*
330 : * reset scriptLimit in case it was advanced during reading a
331 : * multiple-code-unit character
332 : */
333 0 : scriptLimit = startOfChar;
334 :
335 0 : break;
336 : }
337 : }
338 :
339 0 : aRunStart = scriptStart;
340 0 : aRunLimit = scriptLimit;
341 0 : aRunScript = scriptCode;
342 :
343 0 : return true;
344 : }
|