1 : /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 : *
3 : * ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is mozilla.org code.
17 : *
18 : * The Initial Developer of the Original Code is
19 : * IBM Corporation.
20 : * Portions created by the Initial Developer are Copyright (C) 2000
21 : * the Initial Developer. All Rights Reserved.
22 : *
23 : * Contributor(s):
24 : * Simon Montagu
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either of the GNU General Public License Version 2 or later (the "GPL"),
28 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 : #ifdef IBMBIDI
40 :
41 : #include "prmem.h"
42 : #include "nsBidi.h"
43 : #include "nsBidiUtils.h"
44 : #include "nsCRT.h"
45 :
46 : // These are #defined in <sys/regset.h> under Solaris 10 x86
47 : #undef CS
48 : #undef ES
49 :
50 : /* Comparing the description of the Bidi algorithm with this implementation
51 : is easier with the same names for the Bidi types in the code as there.
52 : */
53 : enum {
54 : L = eCharType_LeftToRight,
55 : R = eCharType_RightToLeft,
56 : EN = eCharType_EuropeanNumber,
57 : ES = eCharType_EuropeanNumberSeparator,
58 : ET = eCharType_EuropeanNumberTerminator,
59 : AN = eCharType_ArabicNumber,
60 : CS = eCharType_CommonNumberSeparator,
61 : B = eCharType_BlockSeparator,
62 : S = eCharType_SegmentSeparator,
63 : WS = eCharType_WhiteSpaceNeutral,
64 : O_N = eCharType_OtherNeutral,
65 : LRE = eCharType_LeftToRightEmbedding,
66 : LRO = eCharType_LeftToRightOverride,
67 : AL = eCharType_RightToLeftArabic,
68 : RLE = eCharType_RightToLeftEmbedding,
69 : RLO = eCharType_RightToLeftOverride,
70 : PDF = eCharType_PopDirectionalFormat,
71 : NSM = eCharType_DirNonSpacingMark,
72 : BN = eCharType_BoundaryNeutral,
73 : dirPropCount
74 : };
75 :
76 : /* to avoid some conditional statements, use tiny constant arrays */
77 : static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
78 : static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
79 : static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
80 :
81 : #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
82 : #define DIRPROP_FLAG_E(level) flagE[(level)&1]
83 : #define DIRPROP_FLAG_O(level) flagO[(level)&1]
84 :
85 : /*
86 : * General implementation notes:
87 : *
88 : * Throughout the implementation, there are comments like (W2) that refer to
89 : * rules of the Bidi algorithm in its version 5, in this example to the second
90 : * rule of the resolution of weak types.
91 : *
92 : * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
93 : * character according to UTF-16, the second UChar gets the directional property of
94 : * the entire character assigned, while the first one gets a BN, a boundary
95 : * neutral, type, which is ignored by most of the algorithm according to
96 : * rule (X9) and the implementation suggestions of the Bidi algorithm.
97 : *
98 : * Later, AdjustWSLevels() will set the level for each BN to that of the
99 : * following character (UChar), which results in surrogate pairs getting the
100 : * same level on each of their surrogates.
101 : *
102 : * In a UTF-8 implementation, the same thing could be done: the last byte of
103 : * a multi-byte sequence would get the "real" property, while all previous
104 : * bytes of that sequence would get BN.
105 : *
106 : * It is not possible to assign all those parts of a character the same real
107 : * property because this would fail in the resolution of weak types with rules
108 : * that look at immediately surrounding types.
109 : *
110 : * As a related topic, this implementation does not remove Boundary Neutral
111 : * types from the input, but ignores them whenever this is relevant.
112 : * For example, the loop for the resolution of the weak types reads
113 : * types until it finds a non-BN.
114 : * Also, explicit embedding codes are neither changed into BN nor removed.
115 : * They are only treated the same way real BNs are.
116 : * As stated before, AdjustWSLevels() takes care of them at the end.
117 : * For the purpose of conformance, the levels of all these codes
118 : * do not matter.
119 : *
120 : * Note that this implementation never modifies the dirProps
121 : * after the initial setup.
122 : *
123 : *
124 : * In this implementation, the resolution of weak types (Wn),
125 : * neutrals (Nn), and the assignment of the resolved level (In)
126 : * are all done in one single loop, in ResolveImplicitLevels().
127 : * Changes of dirProp values are done on the fly, without writing
128 : * them back to the dirProps array.
129 : *
130 : *
131 : * This implementation contains code that allows to bypass steps of the
132 : * algorithm that are not needed on the specific paragraph
133 : * in order to speed up the most common cases considerably,
134 : * like text that is entirely LTR, or RTL text without numbers.
135 : *
136 : * Most of this is done by setting a bit for each directional property
137 : * in a flags variable and later checking for whether there are
138 : * any LTR characters or any RTL characters, or both, whether
139 : * there are any explicit embedding codes, etc.
140 : *
141 : * If the (Xn) steps are performed, then the flags are re-evaluated,
142 : * because they will then not contain the embedding codes any more
143 : * and will be adjusted for override codes, so that subsequently
144 : * more bypassing may be possible than what the initial flags suggested.
145 : *
146 : * If the text is not mixed-directional, then the
147 : * algorithm steps for the weak type resolution are not performed,
148 : * and all levels are set to the paragraph level.
149 : *
150 : * If there are no explicit embedding codes, then the (Xn) steps
151 : * are not performed.
152 : *
153 : * If embedding levels are supplied as a parameter, then all
154 : * explicit embedding codes are ignored, and the (Xn) steps
155 : * are not performed.
156 : *
157 : * White Space types could get the level of the run they belong to,
158 : * and are checked with a test of (flags&MASK_EMBEDDING) to
159 : * consider if the paragraph direction should be considered in
160 : * the flags variable.
161 : *
162 : * If there are no White Space types in the paragraph, then
163 : * (L1) is not necessary in AdjustWSLevels().
164 : */
165 0 : nsBidi::nsBidi()
166 : {
167 0 : Init();
168 :
169 0 : mMayAllocateText=true;
170 0 : mMayAllocateRuns=true;
171 0 : }
172 :
173 0 : nsBidi::~nsBidi()
174 : {
175 0 : Free();
176 0 : }
177 :
178 0 : void nsBidi::Init()
179 : {
180 : /* reset the object, all pointers NULL, all flags false, all sizes 0 */
181 0 : mLength = 0;
182 0 : mParaLevel = 0;
183 0 : mFlags = 0;
184 0 : mDirection = NSBIDI_LTR;
185 0 : mTrailingWSStart = 0;
186 :
187 0 : mDirPropsSize = 0;
188 0 : mLevelsSize = 0;
189 0 : mRunsSize = 0;
190 0 : mRunCount = -1;
191 :
192 0 : mDirProps=NULL;
193 0 : mLevels=NULL;
194 0 : mRuns=NULL;
195 :
196 0 : mDirPropsMemory=NULL;
197 0 : mLevelsMemory=NULL;
198 0 : mRunsMemory=NULL;
199 :
200 0 : mMayAllocateText=false;
201 0 : mMayAllocateRuns=false;
202 :
203 0 : }
204 :
205 : /*
206 : * We are allowed to allocate memory if aMemory==NULL or
207 : * aMayAllocate==true for each array that we need.
208 : * We also try to grow and shrink memory as needed if we
209 : * allocate it.
210 : *
211 : * Assume aSizeNeeded>0.
212 : * If *aMemory!=NULL, then assume *aSize>0.
213 : *
214 : * ### this realloc() may unnecessarily copy the old data,
215 : * which we know we don't need any more;
216 : * is this the best way to do this??
217 : */
218 0 : bool nsBidi::GetMemory(void **aMemory, PRSize *aSize, bool aMayAllocate, PRSize aSizeNeeded)
219 : {
220 : /* check for existing memory */
221 0 : if(*aMemory==NULL) {
222 : /* we need to allocate memory */
223 0 : if(!aMayAllocate) {
224 0 : return false;
225 : } else {
226 0 : *aMemory=PR_MALLOC(aSizeNeeded);
227 0 : if (*aMemory!=NULL) {
228 0 : *aSize=aSizeNeeded;
229 0 : return true;
230 : } else {
231 0 : *aSize=0;
232 0 : return false;
233 : }
234 : }
235 : } else {
236 : /* there is some memory, is it enough or too much? */
237 0 : if(aSizeNeeded>*aSize && !aMayAllocate) {
238 : /* not enough memory, and we must not allocate */
239 0 : return false;
240 0 : } else if(aSizeNeeded!=*aSize && aMayAllocate) {
241 : /* we may try to grow or shrink */
242 0 : void *memory=PR_REALLOC(*aMemory, aSizeNeeded);
243 :
244 0 : if(memory!=NULL) {
245 0 : *aMemory=memory;
246 0 : *aSize=aSizeNeeded;
247 0 : return true;
248 : } else {
249 : /* we failed to grow */
250 0 : return false;
251 : }
252 : } else {
253 : /* we have at least enough memory and must not allocate */
254 0 : return true;
255 : }
256 : }
257 : }
258 :
259 0 : void nsBidi::Free()
260 : {
261 0 : PR_FREEIF(mDirPropsMemory);
262 0 : PR_FREEIF(mLevelsMemory);
263 0 : PR_FREEIF(mRunsMemory);
264 0 : }
265 :
266 : /* SetPara ------------------------------------------------------------ */
267 :
268 0 : nsresult nsBidi::SetPara(const PRUnichar *aText, PRInt32 aLength,
269 : nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels)
270 : {
271 : nsBidiDirection direction;
272 :
273 : /* check the argument values */
274 0 : if(aText==NULL ||
275 0 : ((NSBIDI_MAX_EXPLICIT_LEVEL<aParaLevel) && !IS_DEFAULT_LEVEL(aParaLevel)) ||
276 : aLength<-1
277 : ) {
278 0 : return NS_ERROR_INVALID_ARG;
279 : }
280 :
281 0 : if(aLength==-1) {
282 0 : aLength=nsCRT::strlen(aText);
283 : }
284 :
285 : /* initialize member data */
286 0 : mLength=aLength;
287 0 : mParaLevel=aParaLevel;
288 0 : mDirection=NSBIDI_LTR;
289 0 : mTrailingWSStart=aLength; /* the levels[] will reflect the WS run */
290 :
291 0 : mDirProps=NULL;
292 0 : mLevels=NULL;
293 0 : mRuns=NULL;
294 :
295 0 : if(aLength==0) {
296 : /*
297 : * For an empty paragraph, create an nsBidi object with the aParaLevel and
298 : * the flags and the direction set but without allocating zero-length arrays.
299 : * There is nothing more to do.
300 : */
301 0 : if(IS_DEFAULT_LEVEL(aParaLevel)) {
302 0 : mParaLevel&=1;
303 : }
304 0 : if(aParaLevel&1) {
305 0 : mFlags=DIRPROP_FLAG(R);
306 0 : mDirection=NSBIDI_RTL;
307 : } else {
308 0 : mFlags=DIRPROP_FLAG(L);
309 0 : mDirection=NSBIDI_LTR;
310 : }
311 :
312 0 : mRunCount=0;
313 0 : return NS_OK;
314 : }
315 :
316 0 : mRunCount=-1;
317 :
318 : /*
319 : * Get the directional properties,
320 : * the flags bit-set, and
321 : * determine the partagraph level if necessary.
322 : */
323 0 : if(GETDIRPROPSMEMORY(aLength)) {
324 0 : mDirProps=mDirPropsMemory;
325 0 : GetDirProps(aText);
326 : } else {
327 0 : return NS_ERROR_OUT_OF_MEMORY;
328 : }
329 :
330 : /* are explicit levels specified? */
331 0 : if(aEmbeddingLevels==NULL) {
332 : /* no: determine explicit levels according to the (Xn) rules */\
333 0 : if(GETLEVELSMEMORY(aLength)) {
334 0 : mLevels=mLevelsMemory;
335 0 : direction=ResolveExplicitLevels();
336 : } else {
337 0 : return NS_ERROR_OUT_OF_MEMORY;
338 : }
339 : } else {
340 : /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */
341 0 : mLevels=aEmbeddingLevels;
342 0 : nsresult rv = CheckExplicitLevels(&direction);
343 0 : if(NS_FAILED(rv)) {
344 0 : return rv;
345 : }
346 : }
347 :
348 : /*
349 : * The steps after (X9) in the Bidi algorithm are performed only if
350 : * the paragraph text has mixed directionality!
351 : */
352 0 : switch(direction) {
353 : case NSBIDI_LTR:
354 : /* make sure paraLevel is even */
355 0 : mParaLevel=(mParaLevel+1)&~1;
356 :
357 : /* all levels are implicitly at paraLevel (important for GetLevels()) */
358 0 : mTrailingWSStart=0;
359 0 : break;
360 : case NSBIDI_RTL:
361 : /* make sure paraLevel is odd */
362 0 : mParaLevel|=1;
363 :
364 : /* all levels are implicitly at paraLevel (important for GetLevels()) */
365 0 : mTrailingWSStart=0;
366 0 : break;
367 : default:
368 : /*
369 : * If there are no external levels specified and there
370 : * are no significant explicit level codes in the text,
371 : * then we can treat the entire paragraph as one run.
372 : * Otherwise, we need to perform the following rules on runs of
373 : * the text with the same embedding levels. (X10)
374 : * "Significant" explicit level codes are ones that actually
375 : * affect non-BN characters.
376 : * Examples for "insignificant" ones are empty embeddings
377 : * LRE-PDF, LRE-RLE-PDF-PDF, etc.
378 : */
379 0 : if(aEmbeddingLevels==NULL && !(mFlags&DIRPROP_FLAG_MULTI_RUNS)) {
380 : ResolveImplicitLevels(0, aLength,
381 : GET_LR_FROM_LEVEL(mParaLevel),
382 0 : GET_LR_FROM_LEVEL(mParaLevel));
383 : } else {
384 : /* sor, eor: start and end types of same-level-run */
385 0 : nsBidiLevel *levels=mLevels;
386 0 : PRInt32 start, limit=0;
387 : nsBidiLevel level, nextLevel;
388 : DirProp sor, eor;
389 :
390 : /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
391 0 : level=mParaLevel;
392 0 : nextLevel=levels[0];
393 0 : if(level<nextLevel) {
394 0 : eor=GET_LR_FROM_LEVEL(nextLevel);
395 : } else {
396 0 : eor=GET_LR_FROM_LEVEL(level);
397 : }
398 :
399 0 : do {
400 : /* determine start and limit of the run (end points just behind the run) */
401 :
402 : /* the values for this run's start are the same as for the previous run's end */
403 0 : sor=eor;
404 0 : start=limit;
405 0 : level=nextLevel;
406 :
407 : /* search for the limit of this run */
408 0 : while(++limit<aLength && levels[limit]==level) {}
409 :
410 : /* get the correct level of the next run */
411 0 : if(limit<aLength) {
412 0 : nextLevel=levels[limit];
413 : } else {
414 0 : nextLevel=mParaLevel;
415 : }
416 :
417 : /* determine eor from max(level, nextLevel); sor is last run's eor */
418 0 : if((level&~NSBIDI_LEVEL_OVERRIDE)<(nextLevel&~NSBIDI_LEVEL_OVERRIDE)) {
419 0 : eor=GET_LR_FROM_LEVEL(nextLevel);
420 : } else {
421 0 : eor=GET_LR_FROM_LEVEL(level);
422 : }
423 :
424 : /* if the run consists of overridden directional types, then there
425 : are no implicit types to be resolved */
426 0 : if(!(level&NSBIDI_LEVEL_OVERRIDE)) {
427 0 : ResolveImplicitLevels(start, limit, sor, eor);
428 : }
429 : } while(limit<aLength);
430 : }
431 :
432 : /* reset the embedding levels for some non-graphic characters (L1), (X9) */
433 0 : AdjustWSLevels();
434 0 : break;
435 : }
436 :
437 0 : mDirection=direction;
438 0 : return NS_OK;
439 : }
440 :
441 : /* perform (P2)..(P3) ------------------------------------------------------- */
442 :
443 : /*
444 : * Get the directional properties for the text,
445 : * calculate the flags bit-set, and
446 : * determine the partagraph level if necessary.
447 : */
448 0 : void nsBidi::GetDirProps(const PRUnichar *aText)
449 : {
450 0 : DirProp *dirProps=mDirPropsMemory; /* mDirProps is const */
451 :
452 0 : PRInt32 i=0, length=mLength;
453 0 : Flags flags=0; /* collect all directionalities in the text */
454 : PRUnichar uchar;
455 : DirProp dirProp;
456 :
457 0 : if(IS_DEFAULT_LEVEL(mParaLevel)) {
458 : /* determine the paragraph level (P2..P3) */
459 0 : for(;;) {
460 0 : uchar=aText[i];
461 0 : if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) {
462 : /* not a surrogate pair */
463 0 : flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetCharType((PRUint32)uchar));
464 : } else {
465 : /* a surrogate pair */
466 0 : dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */
467 0 : flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetCharType(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN);
468 : }
469 0 : ++i;
470 0 : if(dirProp==L) {
471 0 : mParaLevel=0;
472 0 : break;
473 0 : } else if(dirProp==R || dirProp==AL) {
474 0 : mParaLevel=1;
475 0 : break;
476 0 : } else if(i==length) {
477 : /*
478 : * see comment in nsIBidi.h:
479 : * the DEFAULT_XXX values are designed so that
480 : * their bit 0 alone yields the intended default
481 : */
482 0 : mParaLevel&=1;
483 0 : break;
484 : }
485 : }
486 : }
487 :
488 : /* get the rest of the directional properties and the flags bits */
489 0 : while(i<length) {
490 0 : uchar=aText[i];
491 0 : if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) {
492 : /* not a surrogate pair */
493 0 : flags|=DIRPROP_FLAG(dirProps[i]=GetCharType((PRUint32)uchar));
494 : } else {
495 : /* a surrogate pair */
496 0 : dirProps[i++]=BN; /* second surrogate in the pair gets the BN type */
497 0 : flags|=DIRPROP_FLAG(dirProps[i]=GetCharType(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN);
498 : }
499 0 : ++i;
500 : }
501 0 : if(flags&MASK_EMBEDDING) {
502 0 : flags|=DIRPROP_FLAG_LR(mParaLevel);
503 : }
504 0 : mFlags=flags;
505 0 : }
506 :
507 : /* perform (X1)..(X9) ------------------------------------------------------- */
508 :
509 : /*
510 : * Resolve the explicit levels as specified by explicit embedding codes.
511 : * Recalculate the flags to have them reflect the real properties
512 : * after taking the explicit embeddings into account.
513 : *
514 : * The Bidi algorithm is designed to result in the same behavior whether embedding
515 : * levels are externally specified (from "styled text", supposedly the preferred
516 : * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
517 : * That is why (X9) instructs to remove all explicit codes (and BN).
518 : * However, in a real implementation, this removal of these codes and their index
519 : * positions in the plain text is undesirable since it would result in
520 : * reallocated, reindexed text.
521 : * Instead, this implementation leaves the codes in there and just ignores them
522 : * in the subsequent processing.
523 : * In order to get the same reordering behavior, positions with a BN or an
524 : * explicit embedding code just get the same level assigned as the last "real"
525 : * character.
526 : *
527 : * Some implementations, not this one, then overwrite some of these
528 : * directionality properties at "real" same-level-run boundaries by
529 : * L or R codes so that the resolution of weak types can be performed on the
530 : * entire paragraph at once instead of having to parse it once more and
531 : * perform that resolution on same-level-runs.
532 : * This limits the scope of the implicit rules in effectively
533 : * the same way as the run limits.
534 : *
535 : * Instead, this implementation does not modify these codes.
536 : * On one hand, the paragraph has to be scanned for same-level-runs, but
537 : * on the other hand, this saves another loop to reset these codes,
538 : * or saves making and modifying a copy of dirProps[].
539 : *
540 : *
541 : * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
542 : *
543 : *
544 : * Handling the stack of explicit levels (Xn):
545 : *
546 : * With the Bidi stack of explicit levels,
547 : * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
548 : * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL==61.
549 : *
550 : * In order to have a correct push-pop semantics even in the case of overflows,
551 : * there are two overflow counters:
552 : * - countOver60 is incremented with each LRx at level 60
553 : * - from level 60, one RLx increases the level to 61
554 : * - countOver61 is incremented with each LRx and RLx at level 61
555 : *
556 : * Popping levels with PDF must work in the opposite order so that level 61
557 : * is correct at the correct point. Underflows (too many PDFs) must be checked.
558 : *
559 : * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
560 : */
561 :
562 0 : nsBidiDirection nsBidi::ResolveExplicitLevels()
563 : {
564 0 : const DirProp *dirProps=mDirProps;
565 0 : nsBidiLevel *levels=mLevels;
566 :
567 0 : PRInt32 i=0, length=mLength;
568 0 : Flags flags=mFlags; /* collect all directionalities in the text */
569 : DirProp dirProp;
570 0 : nsBidiLevel level=mParaLevel;
571 :
572 : nsBidiDirection direction;
573 :
574 : /* determine if the text is mixed-directional or single-directional */
575 0 : direction=DirectionFromFlags(flags);
576 :
577 : /* we may not need to resolve any explicit levels */
578 0 : if(direction!=NSBIDI_MIXED) {
579 : /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
580 0 : } else if(!(flags&MASK_EXPLICIT)) {
581 : /* mixed, but all characters are at the same embedding level */
582 : /* set all levels to the paragraph level */
583 0 : for(i=0; i<length; ++i) {
584 0 : levels[i]=level;
585 : }
586 : } else {
587 : /* continue to perform (Xn) */
588 :
589 : /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
590 : /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */
591 0 : nsBidiLevel embeddingLevel=level, newLevel, stackTop=0;
592 :
593 : nsBidiLevel stack[NSBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL */
594 0 : PRUint32 countOver60=0, countOver61=0; /* count overflows of explicit levels */
595 :
596 : /* recalculate the flags */
597 0 : flags=0;
598 :
599 : /* since we assume that this is a single paragraph, we ignore (X8) */
600 0 : for(i=0; i<length; ++i) {
601 0 : dirProp=dirProps[i];
602 0 : switch(dirProp) {
603 : case LRE:
604 : case LRO:
605 : /* (X3, X5) */
606 0 : newLevel=(embeddingLevel+2)&~(NSBIDI_LEVEL_OVERRIDE|1); /* least greater even level */
607 0 : if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) {
608 0 : stack[stackTop]=embeddingLevel;
609 0 : ++stackTop;
610 0 : embeddingLevel=newLevel;
611 0 : if(dirProp==LRO) {
612 0 : embeddingLevel|=NSBIDI_LEVEL_OVERRIDE;
613 : } else {
614 0 : embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE;
615 : }
616 0 : } else if((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL) {
617 0 : ++countOver61;
618 : } else /* (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL-1 */ {
619 0 : ++countOver60;
620 : }
621 0 : flags|=DIRPROP_FLAG(BN);
622 0 : break;
623 : case RLE:
624 : case RLO:
625 : /* (X2, X4) */
626 0 : newLevel=((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)+1)|1; /* least greater odd level */
627 0 : if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) {
628 0 : stack[stackTop]=embeddingLevel;
629 0 : ++stackTop;
630 0 : embeddingLevel=newLevel;
631 0 : if(dirProp==RLO) {
632 0 : embeddingLevel|=NSBIDI_LEVEL_OVERRIDE;
633 : } else {
634 0 : embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE;
635 : }
636 : } else {
637 0 : ++countOver61;
638 : }
639 0 : flags|=DIRPROP_FLAG(BN);
640 0 : break;
641 : case PDF:
642 : /* (X7) */
643 : /* handle all the overflow cases first */
644 0 : if(countOver61>0) {
645 0 : --countOver61;
646 0 : } else if(countOver60>0 && (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)!=NSBIDI_MAX_EXPLICIT_LEVEL) {
647 : /* handle LRx overflows from level 60 */
648 0 : --countOver60;
649 0 : } else if(stackTop>0) {
650 : /* this is the pop operation; it also pops level 61 while countOver60>0 */
651 0 : --stackTop;
652 0 : embeddingLevel=stack[stackTop];
653 : /* } else { (underflow) */
654 : }
655 0 : flags|=DIRPROP_FLAG(BN);
656 0 : break;
657 : case B:
658 : /*
659 : * We do not really expect to see a paragraph separator (B),
660 : * but we should do something reasonable with it,
661 : * especially at the end of the text.
662 : */
663 0 : stackTop=0;
664 0 : countOver60=countOver61=0;
665 0 : embeddingLevel=level=mParaLevel;
666 0 : flags|=DIRPROP_FLAG(B);
667 0 : break;
668 : case BN:
669 : /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
670 : /* they will get their levels set correctly in AdjustWSLevels() */
671 0 : flags|=DIRPROP_FLAG(BN);
672 0 : break;
673 : default:
674 : /* all other types get the "real" level */
675 0 : if(level!=embeddingLevel) {
676 0 : level=embeddingLevel;
677 0 : if(level&NSBIDI_LEVEL_OVERRIDE) {
678 0 : flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
679 : } else {
680 0 : flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
681 : }
682 : }
683 0 : if(!(level&NSBIDI_LEVEL_OVERRIDE)) {
684 0 : flags|=DIRPROP_FLAG(dirProp);
685 : }
686 0 : break;
687 : }
688 :
689 : /*
690 : * We need to set reasonable levels even on BN codes and
691 : * explicit codes because we will later look at same-level runs (X10).
692 : */
693 0 : levels[i]=level;
694 : }
695 0 : if(flags&MASK_EMBEDDING) {
696 0 : flags|=DIRPROP_FLAG_LR(mParaLevel);
697 : }
698 :
699 : /* subsequently, ignore the explicit codes and BN (X9) */
700 :
701 : /* again, determine if the text is mixed-directional or single-directional */
702 0 : mFlags=flags;
703 0 : direction=DirectionFromFlags(flags);
704 : }
705 0 : return direction;
706 : }
707 :
708 : /*
709 : * Use a pre-specified embedding levels array:
710 : *
711 : * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
712 : * ignore all explicit codes (X9),
713 : * and check all the preset levels.
714 : *
715 : * Recalculate the flags to have them reflect the real properties
716 : * after taking the explicit embeddings into account.
717 : */
718 0 : nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection)
719 : {
720 0 : const DirProp *dirProps=mDirProps;
721 0 : nsBidiLevel *levels=mLevels;
722 :
723 0 : PRInt32 i, length=mLength;
724 0 : Flags flags=0; /* collect all directionalities in the text */
725 0 : nsBidiLevel level, paraLevel=mParaLevel;
726 :
727 0 : for(i=0; i<length; ++i) {
728 0 : level=levels[i];
729 0 : if(level&NSBIDI_LEVEL_OVERRIDE) {
730 : /* keep the override flag in levels[i] but adjust the flags */
731 0 : level&=~NSBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
732 0 : flags|=DIRPROP_FLAG_O(level);
733 : } else {
734 : /* set the flags */
735 0 : flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProps[i]);
736 : }
737 0 : if(level<paraLevel || NSBIDI_MAX_EXPLICIT_LEVEL<level) {
738 : /* level out of bounds */
739 0 : *aDirection = NSBIDI_LTR;
740 0 : return NS_ERROR_INVALID_ARG;
741 : }
742 : }
743 0 : if(flags&MASK_EMBEDDING) {
744 0 : flags|=DIRPROP_FLAG_LR(mParaLevel);
745 : }
746 :
747 : /* determine if the text is mixed-directional or single-directional */
748 0 : mFlags=flags;
749 0 : *aDirection = DirectionFromFlags(flags);
750 0 : return NS_OK;
751 : }
752 :
753 : /* determine if the text is mixed-directional or single-directional */
754 0 : nsBidiDirection nsBidi::DirectionFromFlags(Flags aFlags)
755 : {
756 : /* if the text contains AN and neutrals, then some neutrals may become RTL */
757 0 : if(!(aFlags&MASK_RTL || (aFlags&DIRPROP_FLAG(AN) && aFlags&MASK_POSSIBLE_N))) {
758 0 : return NSBIDI_LTR;
759 0 : } else if(!(aFlags&MASK_LTR)) {
760 0 : return NSBIDI_RTL;
761 : } else {
762 0 : return NSBIDI_MIXED;
763 : }
764 : }
765 :
766 : /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
767 :
768 : /*
769 : * This implementation of the (Wn) rules applies all rules in one pass.
770 : * In order to do so, it needs a look-ahead of typically 1 character
771 : * (except for W5: sequences of ET) and keeps track of changes
772 : * in a rule Wp that affect a later Wq (p<q).
773 : *
774 : * historyOfEN is a variable-saver: it contains 4 boolean states;
775 : * a bit in it set to 1 means:
776 : * bit 0: the current code is an EN after W2
777 : * bit 1: the current code is an EN after W4
778 : * bit 2: the previous code was an EN after W2
779 : * bit 3: the previous code was an EN after W4
780 : * In other words, b0..1 have transitions of EN in the current iteration,
781 : * while b2..3 have the transitions of EN in the previous iteration.
782 : * A simple historyOfEN<<=2 suffices for the propagation.
783 : *
784 : * The (Nn) and (In) rules are also performed in that same single loop,
785 : * but effectively one iteration behind for white space.
786 : *
787 : * Since all implicit rules are performed in one step, it is not necessary
788 : * to actually store the intermediate directional properties in dirProps[].
789 : */
790 :
791 : #define EN_SHIFT 2
792 : #define EN_AFTER_W2 1
793 : #define EN_AFTER_W4 2
794 : #define EN_ALL 3
795 : #define PREV_EN_AFTER_W2 4
796 : #define PREV_EN_AFTER_W4 8
797 :
798 0 : void nsBidi::ResolveImplicitLevels(PRInt32 aStart, PRInt32 aLimit,
799 : DirProp aSOR, DirProp aEOR)
800 : {
801 0 : const DirProp *dirProps=mDirProps;
802 0 : nsBidiLevel *levels=mLevels;
803 :
804 0 : PRInt32 i, next, neutralStart=-1;
805 : DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral;
806 : PRUint8 historyOfEN;
807 :
808 : /* initialize: current at aSOR, next at aStart (it is aStart<aLimit) */
809 0 : next=aStart;
810 0 : beforeNeutral=dirProp=lastStrong=aSOR;
811 0 : nextDirProp=dirProps[next];
812 0 : historyOfEN=0;
813 :
814 : /*
815 : * In all steps of this implementation, BN and explicit embedding codes
816 : * must be treated as if they didn't exist (X9).
817 : * They will get levels set before a non-neutral character, and remain
818 : * undefined before a neutral one, but AdjustWSLevels() will take care
819 : * of all of them.
820 : */
821 0 : while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) {
822 0 : if(++next<aLimit) {
823 0 : nextDirProp=dirProps[next];
824 : } else {
825 0 : nextDirProp=aEOR;
826 0 : break;
827 : }
828 : }
829 :
830 : /* loop for entire run */
831 0 : while(next<aLimit) {
832 : /* advance */
833 0 : prevDirProp=dirProp;
834 0 : dirProp=nextDirProp;
835 0 : i=next;
836 0 : do {
837 0 : if(++next<aLimit) {
838 0 : nextDirProp=dirProps[next];
839 : } else {
840 0 : nextDirProp=aEOR;
841 0 : break;
842 : }
843 : } while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT);
844 0 : historyOfEN<<=EN_SHIFT;
845 :
846 : /* (W1..W7) */
847 0 : switch(dirProp) {
848 : case L:
849 0 : lastStrong=L;
850 0 : break;
851 : case R:
852 0 : lastStrong=R;
853 0 : break;
854 : case AL:
855 : /* (W3) */
856 0 : lastStrong=AL;
857 0 : dirProp=R;
858 0 : break;
859 : case EN:
860 : /* we have to set historyOfEN correctly */
861 0 : if(lastStrong==AL) {
862 : /* (W2) */
863 0 : dirProp=AN;
864 : } else {
865 0 : if(lastStrong==L) {
866 : /* (W7) */
867 0 : dirProp=L;
868 : }
869 : /* this EN stays after (W2) and (W4) - at least before (W7) */
870 0 : historyOfEN|=EN_ALL;
871 : }
872 0 : break;
873 : case ES:
874 0 : if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
875 : nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
876 : ) {
877 : /* (W4) */
878 0 : if(lastStrong!=L) {
879 0 : dirProp=EN;
880 : } else {
881 : /* (W7) */
882 0 : dirProp=L;
883 : }
884 0 : historyOfEN|=EN_AFTER_W4;
885 : } else {
886 : /* (W6) */
887 0 : dirProp=O_N;
888 : }
889 0 : break;
890 : case CS:
891 0 : if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
892 : nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
893 : ) {
894 : /* (W4) */
895 0 : if(lastStrong!=L) {
896 0 : dirProp=EN;
897 : } else {
898 : /* (W7) */
899 0 : dirProp=L;
900 : }
901 0 : historyOfEN|=EN_AFTER_W4;
902 0 : } else if(prevDirProp==AN && /* previous was AN */
903 : (nextDirProp==AN || /* next is AN */
904 : (nextDirProp==EN && lastStrong==AL)) /* or (W2) will make it one */
905 : ) {
906 : /* (W4) */
907 0 : dirProp=AN;
908 : } else {
909 : /* (W6) */
910 0 : dirProp=O_N;
911 : }
912 0 : break;
913 : case ET:
914 : /* get sequence of ET; advance only next, not current, previous or historyOfEN */
915 0 : while(next<aLimit && DIRPROP_FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
916 0 : if(++next<aLimit) {
917 0 : nextDirProp=dirProps[next];
918 : } else {
919 0 : nextDirProp=aEOR;
920 0 : break;
921 : }
922 : }
923 :
924 0 : if( historyOfEN&PREV_EN_AFTER_W4 || /* previous was EN before (W5) */
925 : (nextDirProp==EN && lastStrong!=AL) /* next is EN and (W2) won't make it AN */
926 : ) {
927 : /* (W5) */
928 0 : if(lastStrong!=L) {
929 0 : dirProp=EN;
930 : } else {
931 : /* (W7) */
932 0 : dirProp=L;
933 : }
934 : } else {
935 : /* (W6) */
936 0 : dirProp=O_N;
937 : }
938 :
939 : /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
940 0 : break;
941 : case NSM:
942 : /* (W1) */
943 0 : dirProp=prevDirProp;
944 : /* set historyOfEN back to prevDirProp's historyOfEN */
945 0 : historyOfEN>>=EN_SHIFT;
946 : /*
947 : * Technically, this should be done before the switch() in the form
948 : * if(nextDirProp==NSM) {
949 : * dirProps[next]=nextDirProp=dirProp;
950 : * }
951 : *
952 : * - effectively one iteration ahead.
953 : * However, whether the next dirProp is NSM or is equal to the current dirProp
954 : * does not change the outcome of any condition in (W2)..(W7).
955 : */
956 0 : break;
957 : default:
958 0 : break;
959 : }
960 :
961 : /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
962 :
963 : /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
964 : /* this is one iteration late for the neutrals */
965 0 : if(DIRPROP_FLAG(dirProp)&MASK_N) {
966 0 : if(neutralStart<0) {
967 : /* start of a sequence of neutrals */
968 0 : neutralStart=i;
969 0 : beforeNeutral=prevDirProp;
970 : }
971 : } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
972 : /*
973 : * Note that all levels[] values are still the same at this
974 : * point because this function is called for an entire
975 : * same-level run.
976 : * Therefore, we need to read only one actual level.
977 : */
978 0 : nsBidiLevel level=levels[i];
979 :
980 0 : if(neutralStart>=0) {
981 : nsBidiLevel final;
982 : /* end of a sequence of neutrals (dirProp is "afterNeutral") */
983 0 : if(beforeNeutral==L) {
984 0 : if(dirProp==L) {
985 0 : final=0; /* make all neutrals L (N1) */
986 : } else {
987 0 : final=level; /* make all neutrals "e" (N2) */
988 : }
989 : } else /* beforeNeutral is one of { R, EN, AN } */ {
990 0 : if(dirProp==L) {
991 0 : final=level; /* make all neutrals "e" (N2) */
992 : } else {
993 0 : final=1; /* make all neutrals R (N1) */
994 : }
995 : }
996 : /* perform (In) on the sequence of neutrals */
997 0 : if((level^final)&1) {
998 : /* do something only if we need to _change_ the level */
999 0 : do {
1000 0 : ++levels[neutralStart];
1001 : } while(++neutralStart<i);
1002 : }
1003 0 : neutralStart=-1;
1004 : }
1005 :
1006 : /* perform (In) on the non-neutral character */
1007 : /*
1008 : * in the cases of (W5), processing a sequence of ET,
1009 : * and of (X9), skipping BN,
1010 : * there may be multiple characters from i to <next
1011 : * that all get (virtually) the same dirProp and (really) the same level
1012 : */
1013 0 : if(dirProp==L) {
1014 0 : if(level&1) {
1015 0 : ++level;
1016 : } else {
1017 0 : i=next; /* we keep the levels */
1018 : }
1019 0 : } else if(dirProp==R) {
1020 0 : if(!(level&1)) {
1021 0 : ++level;
1022 : } else {
1023 0 : i=next; /* we keep the levels */
1024 : }
1025 : } else /* EN or AN */ {
1026 0 : level=(level+2)&~1; /* least greater even level */
1027 : }
1028 :
1029 : /* apply the new level to the sequence, if necessary */
1030 0 : while(i<next) {
1031 0 : levels[i++]=level;
1032 : }
1033 : }
1034 : }
1035 :
1036 : /* perform (Nn) - here,
1037 : the character after the neutrals is aEOR, which is either L or R */
1038 : /* this is one iteration late for the neutrals */
1039 0 : if(neutralStart>=0) {
1040 : /*
1041 : * Note that all levels[] values are still the same at this
1042 : * point because this function is called for an entire
1043 : * same-level run.
1044 : * Therefore, we need to read only one actual level.
1045 : */
1046 0 : nsBidiLevel level=levels[neutralStart], final;
1047 :
1048 : /* end of a sequence of neutrals (aEOR is "afterNeutral") */
1049 0 : if(beforeNeutral==L) {
1050 0 : if(aEOR==L) {
1051 0 : final=0; /* make all neutrals L (N1) */
1052 : } else {
1053 0 : final=level; /* make all neutrals "e" (N2) */
1054 : }
1055 : } else /* beforeNeutral is one of { R, EN, AN } */ {
1056 0 : if(aEOR==L) {
1057 0 : final=level; /* make all neutrals "e" (N2) */
1058 : } else {
1059 0 : final=1; /* make all neutrals R (N1) */
1060 : }
1061 : }
1062 : /* perform (In) on the sequence of neutrals */
1063 0 : if((level^final)&1) {
1064 : /* do something only if we need to _change_ the level */
1065 0 : do {
1066 0 : ++levels[neutralStart];
1067 : } while(++neutralStart<aLimit);
1068 : }
1069 : }
1070 0 : }
1071 :
1072 : /* perform (L1) and (X9) ---------------------------------------------------- */
1073 :
1074 : /*
1075 : * Reset the embedding levels for some non-graphic characters (L1).
1076 : * This function also sets appropriate levels for BN, and
1077 : * explicit embedding types that are supposed to have been removed
1078 : * from the paragraph in (X9).
1079 : */
1080 0 : void nsBidi::AdjustWSLevels()
1081 : {
1082 0 : const DirProp *dirProps=mDirProps;
1083 0 : nsBidiLevel *levels=mLevels;
1084 : PRInt32 i;
1085 :
1086 0 : if(mFlags&MASK_WS) {
1087 0 : nsBidiLevel paraLevel=mParaLevel;
1088 : Flags flag;
1089 :
1090 0 : i=mTrailingWSStart;
1091 0 : while(i>0) {
1092 : /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1093 0 : while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) {
1094 0 : levels[i]=paraLevel;
1095 : }
1096 :
1097 : /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1098 : /* here, i+1 is guaranteed to be <length */
1099 0 : while(i>0) {
1100 0 : flag=DIRPROP_FLAG(dirProps[--i]);
1101 0 : if(flag&MASK_BN_EXPLICIT) {
1102 0 : levels[i]=levels[i+1];
1103 0 : } else if(flag&MASK_B_S) {
1104 0 : levels[i]=paraLevel;
1105 0 : break;
1106 : }
1107 : }
1108 : }
1109 : }
1110 :
1111 : /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */
1112 : /* (a separate loop can be optimized more easily by a compiler) */
1113 0 : if(mFlags&MASK_OVERRIDE) {
1114 0 : for(i=mTrailingWSStart; i>0;) {
1115 0 : levels[--i]&=~NSBIDI_LEVEL_OVERRIDE;
1116 : }
1117 : }
1118 0 : }
1119 :
1120 0 : nsresult nsBidi::GetDirection(nsBidiDirection* aDirection)
1121 : {
1122 0 : *aDirection = mDirection;
1123 0 : return NS_OK;
1124 : }
1125 :
1126 0 : nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel)
1127 : {
1128 0 : *aParaLevel = mParaLevel;
1129 0 : return NS_OK;
1130 : }
1131 : #ifdef FULL_BIDI_ENGINE
1132 :
1133 : /* -------------------------------------------------------------------------- */
1134 :
1135 : nsresult nsBidi::GetLength(PRInt32* aLength)
1136 : {
1137 : *aLength = mLength;
1138 : return NS_OK;
1139 : }
1140 :
1141 : /*
1142 : * General remarks about the functions in this section:
1143 : *
1144 : * These functions deal with the aspects of potentially mixed-directional
1145 : * text in a single paragraph or in a line of a single paragraph
1146 : * which has already been processed according to
1147 : * the Unicode 3.0 Bidi algorithm as defined in
1148 : * http://www.unicode.org/unicode/reports/tr9/ , version 5,
1149 : * also described in The Unicode Standard, Version 3.0 .
1150 : *
1151 : * This means that there is a nsBidi object with a levels
1152 : * and a dirProps array.
1153 : * paraLevel and direction are also set.
1154 : * Only if the length of the text is zero, then levels==dirProps==NULL.
1155 : *
1156 : * The overall directionality of the paragraph
1157 : * or line is used to bypass the reordering steps if possible.
1158 : * Even purely RTL text does not need reordering there because
1159 : * the getLogical/VisualIndex() functions can compute the
1160 : * index on the fly in such a case.
1161 : *
1162 : * The implementation of the access to same-level-runs and of the reordering
1163 : * do attempt to provide better performance and less memory usage compared to
1164 : * a direct implementation of especially rule (L2) with an array of
1165 : * one (32-bit) integer per text character.
1166 : *
1167 : * Here, the levels array is scanned as soon as necessary, and a vector of
1168 : * same-level-runs is created. Reordering then is done on this vector.
1169 : * For each run of text positions that were resolved to the same level,
1170 : * only 8 bytes are stored: the first text position of the run and the visual
1171 : * position behind the run after reordering.
1172 : * One sign bit is used to hold the directionality of the run.
1173 : * This is inefficient if there are many very short runs. If the average run
1174 : * length is <2, then this uses more memory.
1175 : *
1176 : * In a further attempt to save memory, the levels array is never changed
1177 : * after all the resolution rules (Xn, Wn, Nn, In).
1178 : * Many functions have to consider the field trailingWSStart:
1179 : * if it is less than length, then there is an implicit trailing run
1180 : * at the paraLevel,
1181 : * which is not reflected in the levels array.
1182 : * This allows a line nsBidi object to use the same levels array as
1183 : * its paragraph parent object.
1184 : *
1185 : * When a nsBidi object is created for a line of a paragraph, then the
1186 : * paragraph's levels and dirProps arrays are reused by way of setting
1187 : * a pointer into them, not by copying. This again saves memory and forbids to
1188 : * change the now shared levels for (L1).
1189 : */
1190 : nsresult nsBidi::SetLine(nsIBidi* aParaBidi, PRInt32 aStart, PRInt32 aLimit)
1191 : {
1192 : nsBidi* pParent = (nsBidi*)aParaBidi;
1193 : PRInt32 length;
1194 :
1195 : /* check the argument values */
1196 : if(pParent==NULL) {
1197 : return NS_ERROR_INVALID_POINTER;
1198 : } else if(aStart<0 || aStart>aLimit || aLimit>pParent->mLength) {
1199 : return NS_ERROR_INVALID_ARG;
1200 : }
1201 :
1202 : /* set members from our aParaBidi parent */
1203 : length=mLength=aLimit-aStart;
1204 : mParaLevel=pParent->mParaLevel;
1205 :
1206 : mRuns=NULL;
1207 : mFlags=0;
1208 :
1209 : if(length>0) {
1210 : mDirProps=pParent->mDirProps+aStart;
1211 : mLevels=pParent->mLevels+aStart;
1212 : mRunCount=-1;
1213 :
1214 : if(pParent->mDirection!=NSBIDI_MIXED) {
1215 : /* the parent is already trivial */
1216 : mDirection=pParent->mDirection;
1217 :
1218 : /*
1219 : * The parent's levels are all either
1220 : * implicitly or explicitly ==paraLevel;
1221 : * do the same here.
1222 : */
1223 : if(pParent->mTrailingWSStart<=aStart) {
1224 : mTrailingWSStart=0;
1225 : } else if(pParent->mTrailingWSStart<aLimit) {
1226 : mTrailingWSStart=pParent->mTrailingWSStart-aStart;
1227 : } else {
1228 : mTrailingWSStart=length;
1229 : }
1230 : } else {
1231 : const nsBidiLevel *levels=mLevels;
1232 : PRInt32 i, trailingWSStart;
1233 : nsBidiLevel level;
1234 : Flags flags=0;
1235 :
1236 : SetTrailingWSStart();
1237 : trailingWSStart=mTrailingWSStart;
1238 :
1239 : /* recalculate pLineBidi->direction */
1240 : if(trailingWSStart==0) {
1241 : /* all levels are at paraLevel */
1242 : mDirection=(nsBidiDirection)(mParaLevel&1);
1243 : } else {
1244 : /* get the level of the first character */
1245 : level=levels[0]&1;
1246 :
1247 : /* if there is anything of a different level, then the line is mixed */
1248 : if(trailingWSStart<length && (mParaLevel&1)!=level) {
1249 : /* the trailing WS is at paraLevel, which differs from levels[0] */
1250 : mDirection=NSBIDI_MIXED;
1251 : } else {
1252 : /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
1253 : i=1;
1254 : for(;;) {
1255 : if(i==trailingWSStart) {
1256 : /* the direction values match those in level */
1257 : mDirection=(nsBidiDirection)level;
1258 : break;
1259 : } else if((levels[i]&1)!=level) {
1260 : mDirection=NSBIDI_MIXED;
1261 : break;
1262 : }
1263 : ++i;
1264 : }
1265 : }
1266 : }
1267 :
1268 : switch(mDirection) {
1269 : case NSBIDI_LTR:
1270 : /* make sure paraLevel is even */
1271 : mParaLevel=(mParaLevel+1)&~1;
1272 :
1273 : /* all levels are implicitly at paraLevel (important for GetLevels()) */
1274 : mTrailingWSStart=0;
1275 : break;
1276 : case NSBIDI_RTL:
1277 : /* make sure paraLevel is odd */
1278 : mParaLevel|=1;
1279 :
1280 : /* all levels are implicitly at paraLevel (important for GetLevels()) */
1281 : mTrailingWSStart=0;
1282 : break;
1283 : default:
1284 : break;
1285 : }
1286 : }
1287 : } else {
1288 : /* create an object for a zero-length line */
1289 : mDirection=mParaLevel&1 ? NSBIDI_RTL : NSBIDI_LTR;
1290 : mTrailingWSStart=mRunCount=0;
1291 :
1292 : mDirProps=NULL;
1293 : mLevels=NULL;
1294 : }
1295 : return NS_OK;
1296 : }
1297 :
1298 : /* handle trailing WS (L1) -------------------------------------------------- */
1299 :
1300 : /*
1301 : * SetTrailingWSStart() sets the start index for a trailing
1302 : * run of WS in the line. This is necessary because we do not modify
1303 : * the paragraph's levels array that we just point into.
1304 : * Using trailingWSStart is another form of performing (L1).
1305 : *
1306 : * To make subsequent operations easier, we also include the run
1307 : * before the WS if it is at the paraLevel - we merge the two here.
1308 : */
1309 : void nsBidi::SetTrailingWSStart() {
1310 : /* mDirection!=NSBIDI_MIXED */
1311 :
1312 : const DirProp *dirProps=mDirProps;
1313 : nsBidiLevel *levels=mLevels;
1314 : PRInt32 start=mLength;
1315 : nsBidiLevel paraLevel=mParaLevel;
1316 :
1317 : /* go backwards across all WS, BN, explicit codes */
1318 : while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
1319 : --start;
1320 : }
1321 :
1322 : /* if the WS run can be merged with the previous run then do so here */
1323 : while(start>0 && levels[start-1]==paraLevel) {
1324 : --start;
1325 : }
1326 :
1327 : mTrailingWSStart=start;
1328 : }
1329 :
1330 : nsresult nsBidi::GetLevelAt(PRInt32 aCharIndex, nsBidiLevel* aLevel)
1331 : {
1332 : /* return paraLevel if in the trailing WS run, otherwise the real level */
1333 : if(aCharIndex<0 || mLength<=aCharIndex) {
1334 : *aLevel = 0;
1335 : } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) {
1336 : *aLevel = mParaLevel;
1337 : } else {
1338 : *aLevel = mLevels[aCharIndex];
1339 : }
1340 : return NS_OK;
1341 : }
1342 :
1343 : nsresult nsBidi::GetLevels(nsBidiLevel** aLevels)
1344 : {
1345 : PRInt32 start, length;
1346 :
1347 : length = mLength;
1348 : if(length<=0) {
1349 : *aLevels = NULL;
1350 : return NS_ERROR_INVALID_ARG;
1351 : }
1352 :
1353 : start = mTrailingWSStart;
1354 : if(start==length) {
1355 : /* the current levels array reflects the WS run */
1356 : *aLevels = mLevels;
1357 : return NS_OK;
1358 : }
1359 :
1360 : /*
1361 : * After the previous if(), we know that the levels array
1362 : * has an implicit trailing WS run and therefore does not fully
1363 : * reflect itself all the levels.
1364 : * This must be a nsBidi object for a line, and
1365 : * we need to create a new levels array.
1366 : */
1367 :
1368 : if(GETLEVELSMEMORY(length)) {
1369 : nsBidiLevel *levels=mLevelsMemory;
1370 :
1371 : if(start>0 && levels!=mLevels) {
1372 : memcpy(levels, mLevels, start);
1373 : }
1374 : memset(levels+start, mParaLevel, length-start);
1375 :
1376 : /* this new levels array is set for the line and reflects the WS run */
1377 : mTrailingWSStart=length;
1378 : *aLevels=mLevels=levels;
1379 : return NS_OK;
1380 : } else {
1381 : /* out of memory */
1382 : *aLevels = NULL;
1383 : return NS_ERROR_OUT_OF_MEMORY;
1384 : }
1385 : }
1386 : #endif // FULL_BIDI_ENGINE
1387 :
1388 0 : nsresult nsBidi::GetCharTypeAt(PRInt32 aCharIndex, nsCharType* pType)
1389 : {
1390 0 : if(aCharIndex<0 || mLength<=aCharIndex) {
1391 0 : return NS_ERROR_INVALID_ARG;
1392 : }
1393 0 : *pType = (nsCharType)mDirProps[aCharIndex];
1394 0 : return NS_OK;
1395 : }
1396 :
1397 0 : nsresult nsBidi::GetLogicalRun(PRInt32 aLogicalStart, PRInt32 *aLogicalLimit, nsBidiLevel *aLevel)
1398 : {
1399 0 : PRInt32 length = mLength;
1400 :
1401 0 : if(aLogicalStart<0 || length<=aLogicalStart) {
1402 0 : return NS_ERROR_INVALID_ARG;
1403 : }
1404 :
1405 0 : if(mDirection!=NSBIDI_MIXED || aLogicalStart>=mTrailingWSStart) {
1406 0 : if(aLogicalLimit!=NULL) {
1407 0 : *aLogicalLimit=length;
1408 : }
1409 0 : if(aLevel!=NULL) {
1410 0 : *aLevel=mParaLevel;
1411 : }
1412 : } else {
1413 0 : nsBidiLevel *levels=mLevels;
1414 0 : nsBidiLevel level=levels[aLogicalStart];
1415 :
1416 : /* search for the end of the run */
1417 0 : length=mTrailingWSStart;
1418 0 : while(++aLogicalStart<length && level==levels[aLogicalStart]) {}
1419 :
1420 0 : if(aLogicalLimit!=NULL) {
1421 0 : *aLogicalLimit=aLogicalStart;
1422 : }
1423 0 : if(aLevel!=NULL) {
1424 0 : *aLevel=level;
1425 : }
1426 : }
1427 0 : return NS_OK;
1428 : }
1429 :
1430 : /* runs API functions ------------------------------------------------------- */
1431 :
1432 0 : nsresult nsBidi::CountRuns(PRInt32* aRunCount)
1433 : {
1434 0 : if(mRunCount<0 && !GetRuns()) {
1435 0 : return NS_ERROR_OUT_OF_MEMORY;
1436 : } else {
1437 0 : if (aRunCount)
1438 0 : *aRunCount = mRunCount;
1439 0 : return NS_OK;
1440 : }
1441 : }
1442 :
1443 0 : nsresult nsBidi::GetVisualRun(PRInt32 aRunIndex, PRInt32 *aLogicalStart, PRInt32 *aLength, nsBidiDirection *aDirection)
1444 : {
1445 0 : if( aRunIndex<0 ||
1446 0 : (mRunCount==-1 && !GetRuns()) ||
1447 : aRunIndex>=mRunCount
1448 : ) {
1449 0 : *aDirection = NSBIDI_LTR;
1450 0 : return NS_OK;
1451 : } else {
1452 0 : PRInt32 start=mRuns[aRunIndex].logicalStart;
1453 0 : if(aLogicalStart!=NULL) {
1454 0 : *aLogicalStart=GET_INDEX(start);
1455 : }
1456 0 : if(aLength!=NULL) {
1457 0 : if(aRunIndex>0) {
1458 0 : *aLength=mRuns[aRunIndex].visualLimit-
1459 0 : mRuns[aRunIndex-1].visualLimit;
1460 : } else {
1461 0 : *aLength=mRuns[0].visualLimit;
1462 : }
1463 : }
1464 0 : *aDirection = (nsBidiDirection)GET_ODD_BIT(start);
1465 0 : return NS_OK;
1466 : }
1467 : }
1468 :
1469 : /* compute the runs array --------------------------------------------------- */
1470 :
1471 : /*
1472 : * Compute the runs array from the levels array.
1473 : * After GetRuns() returns true, runCount is guaranteed to be >0
1474 : * and the runs are reordered.
1475 : * Odd-level runs have visualStart on their visual right edge and
1476 : * they progress visually to the left.
1477 : */
1478 0 : bool nsBidi::GetRuns()
1479 : {
1480 0 : if(mDirection!=NSBIDI_MIXED) {
1481 : /* simple, single-run case - this covers length==0 */
1482 0 : GetSingleRun(mParaLevel);
1483 : } else /* NSBIDI_MIXED, length>0 */ {
1484 : /* mixed directionality */
1485 0 : PRInt32 length=mLength, limit=length;
1486 :
1487 : /*
1488 : * If there are WS characters at the end of the line
1489 : * and the run preceding them has a level different from
1490 : * paraLevel, then they will form their own run at paraLevel (L1).
1491 : * Count them separately.
1492 : * We need some special treatment for this in order to not
1493 : * modify the levels array which a line nsBidi object shares
1494 : * with its paragraph parent and its other line siblings.
1495 : * In other words, for the trailing WS, it may be
1496 : * levels[]!=paraLevel but we have to treat it like it were so.
1497 : */
1498 0 : limit=mTrailingWSStart;
1499 0 : if(limit==0) {
1500 : /* there is only WS on this line */
1501 0 : GetSingleRun(mParaLevel);
1502 : } else {
1503 0 : nsBidiLevel *levels=mLevels;
1504 : PRInt32 i, runCount;
1505 0 : nsBidiLevel level=NSBIDI_DEFAULT_LTR; /* initialize with no valid level */
1506 :
1507 : /* count the runs, there is at least one non-WS run, and limit>0 */
1508 0 : runCount=0;
1509 0 : for(i=0; i<limit; ++i) {
1510 : /* increment runCount at the start of each run */
1511 0 : if(levels[i]!=level) {
1512 0 : ++runCount;
1513 0 : level=levels[i];
1514 : }
1515 : }
1516 :
1517 : /*
1518 : * We don't need to see if the last run can be merged with a trailing
1519 : * WS run because SetTrailingWSStart() would have done that.
1520 : */
1521 0 : if(runCount==1 && limit==length) {
1522 : /* There is only one non-WS run and no trailing WS-run. */
1523 0 : GetSingleRun(levels[0]);
1524 : } else /* runCount>1 || limit<length */ {
1525 : /* allocate and set the runs */
1526 : Run *runs;
1527 : PRInt32 runIndex, start;
1528 0 : nsBidiLevel minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
1529 :
1530 : /* now, count a (non-mergable) WS run */
1531 0 : if(limit<length) {
1532 0 : ++runCount;
1533 : }
1534 :
1535 : /* runCount>1 */
1536 0 : if(GETRUNSMEMORY(runCount)) {
1537 0 : runs=mRunsMemory;
1538 : } else {
1539 0 : return false;
1540 : }
1541 :
1542 : /* set the runs */
1543 : /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */
1544 : /* however, that would take longer and make other functions more complicated */
1545 0 : runIndex=0;
1546 :
1547 : /* search for the run ends */
1548 0 : start=0;
1549 0 : level=levels[0];
1550 0 : if(level<minLevel) {
1551 0 : minLevel=level;
1552 : }
1553 0 : if(level>maxLevel) {
1554 0 : maxLevel=level;
1555 : }
1556 :
1557 : /* initialize visualLimit values with the run lengths */
1558 0 : for(i=1; i<limit; ++i) {
1559 0 : if(levels[i]!=level) {
1560 : /* i is another run limit */
1561 0 : runs[runIndex].logicalStart=start;
1562 0 : runs[runIndex].visualLimit=i-start;
1563 0 : start=i;
1564 :
1565 0 : level=levels[i];
1566 0 : if(level<minLevel) {
1567 0 : minLevel=level;
1568 : }
1569 0 : if(level>maxLevel) {
1570 0 : maxLevel=level;
1571 : }
1572 0 : ++runIndex;
1573 : }
1574 : }
1575 :
1576 : /* finish the last run at i==limit */
1577 0 : runs[runIndex].logicalStart=start;
1578 0 : runs[runIndex].visualLimit=limit-start;
1579 0 : ++runIndex;
1580 :
1581 0 : if(limit<length) {
1582 : /* there is a separate WS run */
1583 0 : runs[runIndex].logicalStart=limit;
1584 0 : runs[runIndex].visualLimit=length-limit;
1585 0 : if(mParaLevel<minLevel) {
1586 0 : minLevel=mParaLevel;
1587 : }
1588 : }
1589 :
1590 : /* set the object fields */
1591 0 : mRuns=runs;
1592 0 : mRunCount=runCount;
1593 :
1594 0 : ReorderLine(minLevel, maxLevel);
1595 :
1596 : /* now add the direction flags and adjust the visualLimit's to be just that */
1597 0 : ADD_ODD_BIT_FROM_LEVEL(runs[0].logicalStart, levels[runs[0].logicalStart]);
1598 0 : limit=runs[0].visualLimit;
1599 0 : for(i=1; i<runIndex; ++i) {
1600 0 : ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
1601 0 : limit=runs[i].visualLimit+=limit;
1602 : }
1603 :
1604 : /* same for the trailing WS run */
1605 0 : if(runIndex<runCount) {
1606 0 : ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, mParaLevel);
1607 0 : runs[runIndex].visualLimit+=limit;
1608 : }
1609 : }
1610 : }
1611 : }
1612 0 : return true;
1613 : }
1614 :
1615 : /* in trivial cases there is only one trivial run; called by GetRuns() */
1616 0 : void nsBidi::GetSingleRun(nsBidiLevel aLevel)
1617 : {
1618 : /* simple, single-run case */
1619 0 : mRuns=mSimpleRuns;
1620 0 : mRunCount=1;
1621 :
1622 : /* fill and reorder the single run */
1623 0 : mRuns[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, aLevel);
1624 0 : mRuns[0].visualLimit=mLength;
1625 0 : }
1626 :
1627 : /* reorder the runs array (L2) ---------------------------------------------- */
1628 :
1629 : /*
1630 : * Reorder the same-level runs in the runs array.
1631 : * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
1632 : * All the visualStart fields=logical start before reordering.
1633 : * The "odd" bits are not set yet.
1634 : *
1635 : * Reordering with this data structure lends itself to some handy shortcuts:
1636 : *
1637 : * Since each run is moved but not modified, and since at the initial maxLevel
1638 : * each sequence of same-level runs consists of only one run each, we
1639 : * don't need to do anything there and can predecrement maxLevel.
1640 : * In many simple cases, the reordering is thus done entirely in the
1641 : * index mapping.
1642 : * Also, reordering occurs only down to the lowest odd level that occurs,
1643 : * which is minLevel|1. However, if the lowest level itself is odd, then
1644 : * in the last reordering the sequence of the runs at this level or higher
1645 : * will be all runs, and we don't need the elaborate loop to search for them.
1646 : * This is covered by ++minLevel instead of minLevel|=1 followed
1647 : * by an extra reorder-all after the reorder-some loop.
1648 : * About a trailing WS run:
1649 : * Such a run would need special treatment because its level is not
1650 : * reflected in levels[] if this is not a paragraph object.
1651 : * Instead, all characters from trailingWSStart on are implicitly at
1652 : * paraLevel.
1653 : * However, for all maxLevel>paraLevel, this run will never be reordered
1654 : * and does not need to be taken into account. maxLevel==paraLevel is only reordered
1655 : * if minLevel==paraLevel is odd, which is done in the extra segment.
1656 : * This means that for the main reordering loop we don't need to consider
1657 : * this run and can --runCount. If it is later part of the all-runs
1658 : * reordering, then runCount is adjusted accordingly.
1659 : */
1660 0 : void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel)
1661 : {
1662 : Run *runs;
1663 : nsBidiLevel *levels;
1664 : PRInt32 firstRun, endRun, limitRun, runCount, temp;
1665 :
1666 : /* nothing to do? */
1667 0 : if(aMaxLevel<=(aMinLevel|1)) {
1668 0 : return;
1669 : }
1670 :
1671 : /*
1672 : * Reorder only down to the lowest odd level
1673 : * and reorder at an odd aMinLevel in a separate, simpler loop.
1674 : * See comments above for why aMinLevel is always incremented.
1675 : */
1676 0 : ++aMinLevel;
1677 :
1678 0 : runs=mRuns;
1679 0 : levels=mLevels;
1680 0 : runCount=mRunCount;
1681 :
1682 : /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */
1683 0 : if(mTrailingWSStart<mLength) {
1684 0 : --runCount;
1685 : }
1686 :
1687 0 : while(--aMaxLevel>=aMinLevel) {
1688 0 : firstRun=0;
1689 :
1690 : /* loop for all sequences of runs */
1691 0 : for(;;) {
1692 : /* look for a sequence of runs that are all at >=aMaxLevel */
1693 : /* look for the first run of such a sequence */
1694 0 : while(firstRun<runCount && levels[runs[firstRun].logicalStart]<aMaxLevel) {
1695 0 : ++firstRun;
1696 : }
1697 0 : if(firstRun>=runCount) {
1698 0 : break; /* no more such runs */
1699 : }
1700 :
1701 : /* look for the limit run of such a sequence (the run behind it) */
1702 0 : for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=aMaxLevel;) {}
1703 :
1704 : /* Swap the entire sequence of runs from firstRun to limitRun-1. */
1705 0 : endRun=limitRun-1;
1706 0 : while(firstRun<endRun) {
1707 0 : temp=runs[firstRun].logicalStart;
1708 0 : runs[firstRun].logicalStart=runs[endRun].logicalStart;
1709 0 : runs[endRun].logicalStart=temp;
1710 :
1711 0 : temp=runs[firstRun].visualLimit;
1712 0 : runs[firstRun].visualLimit=runs[endRun].visualLimit;
1713 0 : runs[endRun].visualLimit=temp;
1714 :
1715 0 : ++firstRun;
1716 0 : --endRun;
1717 : }
1718 :
1719 0 : if(limitRun==runCount) {
1720 0 : break; /* no more such runs */
1721 : } else {
1722 0 : firstRun=limitRun+1;
1723 : }
1724 : }
1725 : }
1726 :
1727 : /* now do aMaxLevel==old aMinLevel (==odd!), see above */
1728 0 : if(!(aMinLevel&1)) {
1729 0 : firstRun=0;
1730 :
1731 : /* include the trailing WS run in this complete reordering */
1732 0 : if(mTrailingWSStart==mLength) {
1733 0 : --runCount;
1734 : }
1735 :
1736 : /* Swap the entire sequence of all runs. (endRun==runCount) */
1737 0 : while(firstRun<runCount) {
1738 0 : temp=runs[firstRun].logicalStart;
1739 0 : runs[firstRun].logicalStart=runs[runCount].logicalStart;
1740 0 : runs[runCount].logicalStart=temp;
1741 :
1742 0 : temp=runs[firstRun].visualLimit;
1743 0 : runs[firstRun].visualLimit=runs[runCount].visualLimit;
1744 0 : runs[runCount].visualLimit=temp;
1745 :
1746 0 : ++firstRun;
1747 0 : --runCount;
1748 : }
1749 : }
1750 : }
1751 :
1752 0 : nsresult nsBidi::ReorderVisual(const nsBidiLevel *aLevels, PRInt32 aLength, PRInt32 *aIndexMap)
1753 : {
1754 : PRInt32 start, end, limit, temp;
1755 : nsBidiLevel minLevel, maxLevel;
1756 :
1757 0 : if(aIndexMap==NULL || !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
1758 0 : return NS_OK;
1759 : }
1760 :
1761 : /* nothing to do? */
1762 0 : if(minLevel==maxLevel && (minLevel&1)==0) {
1763 0 : return NS_OK;
1764 : }
1765 :
1766 : /* reorder only down to the lowest odd level */
1767 0 : minLevel|=1;
1768 :
1769 : /* loop maxLevel..minLevel */
1770 0 : do {
1771 0 : start=0;
1772 :
1773 : /* loop for all sequences of levels to reorder at the current maxLevel */
1774 0 : for(;;) {
1775 : /* look for a sequence of levels that are all at >=maxLevel */
1776 : /* look for the first index of such a sequence */
1777 0 : while(start<aLength && aLevels[start]<maxLevel) {
1778 0 : ++start;
1779 : }
1780 0 : if(start>=aLength) {
1781 0 : break; /* no more such runs */
1782 : }
1783 :
1784 : /* look for the limit of such a sequence (the index behind it) */
1785 0 : for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
1786 :
1787 : /*
1788 : * Swap the entire interval of indexes from start to limit-1.
1789 : * We don't need to swap the levels for the purpose of this
1790 : * algorithm: the sequence of levels that we look at does not
1791 : * move anyway.
1792 : */
1793 0 : end=limit-1;
1794 0 : while(start<end) {
1795 0 : temp=aIndexMap[start];
1796 0 : aIndexMap[start]=aIndexMap[end];
1797 0 : aIndexMap[end]=temp;
1798 :
1799 0 : ++start;
1800 0 : --end;
1801 : }
1802 :
1803 0 : if(limit==aLength) {
1804 0 : break; /* no more such sequences */
1805 : } else {
1806 0 : start=limit+1;
1807 : }
1808 : }
1809 : } while(--maxLevel>=minLevel);
1810 :
1811 0 : return NS_OK;
1812 : }
1813 :
1814 0 : bool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, PRInt32 aLength,
1815 : PRInt32 *aIndexMap,
1816 : nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel)
1817 : {
1818 : PRInt32 start;
1819 : nsBidiLevel level, minLevel, maxLevel;
1820 :
1821 0 : if(aLevels==NULL || aLength<=0) {
1822 0 : return false;
1823 : }
1824 :
1825 : /* determine minLevel and maxLevel */
1826 0 : minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1;
1827 0 : maxLevel=0;
1828 0 : for(start=aLength; start>0;) {
1829 0 : level=aLevels[--start];
1830 0 : if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) {
1831 0 : return false;
1832 : }
1833 0 : if(level<minLevel) {
1834 0 : minLevel=level;
1835 : }
1836 0 : if(level>maxLevel) {
1837 0 : maxLevel=level;
1838 : }
1839 : }
1840 0 : *aMinLevel=minLevel;
1841 0 : *aMaxLevel=maxLevel;
1842 :
1843 : /* initialize the index map */
1844 0 : for(start=aLength; start>0;) {
1845 0 : --start;
1846 0 : aIndexMap[start]=start;
1847 : }
1848 :
1849 0 : return true;
1850 : }
1851 :
1852 : #ifdef FULL_BIDI_ENGINE
1853 : /* API functions for logical<->visual mapping ------------------------------- */
1854 :
1855 : nsresult nsBidi::GetVisualIndex(PRInt32 aLogicalIndex, PRInt32* aVisualIndex) {
1856 : if(aLogicalIndex<0 || mLength<=aLogicalIndex) {
1857 : return NS_ERROR_INVALID_ARG;
1858 : } else {
1859 : /* we can do the trivial cases without the runs array */
1860 : switch(mDirection) {
1861 : case NSBIDI_LTR:
1862 : *aVisualIndex = aLogicalIndex;
1863 : return NS_OK;
1864 : case NSBIDI_RTL:
1865 : *aVisualIndex = mLength-aLogicalIndex-1;
1866 : return NS_OK;
1867 : default:
1868 : if(mRunCount<0 && !GetRuns()) {
1869 : return NS_ERROR_OUT_OF_MEMORY;
1870 : } else {
1871 : Run *runs=mRuns;
1872 : PRInt32 i, visualStart=0, offset, length;
1873 :
1874 : /* linear search for the run, search on the visual runs */
1875 : for(i=0;; ++i) {
1876 : length=runs[i].visualLimit-visualStart;
1877 : offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart);
1878 : if(offset>=0 && offset<length) {
1879 : if(IS_EVEN_RUN(runs[i].logicalStart)) {
1880 : /* LTR */
1881 : *aVisualIndex = visualStart+offset;
1882 : return NS_OK;
1883 : } else {
1884 : /* RTL */
1885 : *aVisualIndex = visualStart+length-offset-1;
1886 : return NS_OK;
1887 : }
1888 : }
1889 : visualStart+=length;
1890 : }
1891 : }
1892 : }
1893 : }
1894 : }
1895 :
1896 : nsresult nsBidi::GetLogicalIndex(PRInt32 aVisualIndex, PRInt32 *aLogicalIndex)
1897 : {
1898 : if(aVisualIndex<0 || mLength<=aVisualIndex) {
1899 : return NS_ERROR_INVALID_ARG;
1900 : } else {
1901 : /* we can do the trivial cases without the runs array */
1902 : switch(mDirection) {
1903 : case NSBIDI_LTR:
1904 : *aLogicalIndex = aVisualIndex;
1905 : return NS_OK;
1906 : case NSBIDI_RTL:
1907 : *aLogicalIndex = mLength-aVisualIndex-1;
1908 : return NS_OK;
1909 : default:
1910 : if(mRunCount<0 && !GetRuns()) {
1911 : return NS_ERROR_OUT_OF_MEMORY;
1912 : } else {
1913 : Run *runs=mRuns;
1914 : PRInt32 i, runCount=mRunCount, start;
1915 :
1916 : if(runCount<=10) {
1917 : /* linear search for the run */
1918 : for(i=0; aVisualIndex>=runs[i].visualLimit; ++i) {}
1919 : } else {
1920 : /* binary search for the run */
1921 : PRInt32 start=0, limit=runCount;
1922 :
1923 : /* the middle if() will guaranteed find the run, we don't need a loop limit */
1924 : for(;;) {
1925 : i=(start+limit)/2;
1926 : if(aVisualIndex>=runs[i].visualLimit) {
1927 : start=i+1;
1928 : } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) {
1929 : break;
1930 : } else {
1931 : limit=i;
1932 : }
1933 : }
1934 : }
1935 :
1936 : start=runs[i].logicalStart;
1937 : if(IS_EVEN_RUN(start)) {
1938 : /* LTR */
1939 : /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */
1940 : if(i>0) {
1941 : aVisualIndex-=runs[i-1].visualLimit;
1942 : }
1943 : *aLogicalIndex = GET_INDEX(start)+aVisualIndex;
1944 : return NS_OK;
1945 : } else {
1946 : /* RTL */
1947 : *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1;
1948 : return NS_OK;
1949 : }
1950 : }
1951 : }
1952 : }
1953 : }
1954 :
1955 : nsresult nsBidi::GetLogicalMap(PRInt32 *aIndexMap)
1956 : {
1957 : nsBidiLevel *levels;
1958 : nsresult rv;
1959 :
1960 : /* GetLevels() checks all of its and our arguments */
1961 : rv = GetLevels(&levels);
1962 : if(NS_FAILED(rv)) {
1963 : return rv;
1964 : } else if(aIndexMap==NULL) {
1965 : return NS_ERROR_INVALID_ARG;
1966 : } else {
1967 : return ReorderLogical(levels, mLength, aIndexMap);
1968 : }
1969 : }
1970 :
1971 : nsresult nsBidi::GetVisualMap(PRInt32 *aIndexMap)
1972 : {
1973 : PRInt32* runCount=NULL;
1974 : nsresult rv;
1975 :
1976 : /* CountRuns() checks all of its and our arguments */
1977 : rv = CountRuns(runCount);
1978 : if(NS_FAILED(rv)) {
1979 : return rv;
1980 : } else if(aIndexMap==NULL) {
1981 : return NS_ERROR_INVALID_ARG;
1982 : } else {
1983 : /* fill a visual-to-logical index map using the runs[] */
1984 : Run *runs=mRuns, *runsLimit=runs+mRunCount;
1985 : PRInt32 logicalStart, visualStart, visualLimit;
1986 :
1987 : visualStart=0;
1988 : for(; runs<runsLimit; ++runs) {
1989 : logicalStart=runs->logicalStart;
1990 : visualLimit=runs->visualLimit;
1991 : if(IS_EVEN_RUN(logicalStart)) {
1992 : do { /* LTR */
1993 : *aIndexMap++ = logicalStart++;
1994 : } while(++visualStart<visualLimit);
1995 : } else {
1996 : REMOVE_ODD_BIT(logicalStart);
1997 : logicalStart+=visualLimit-visualStart; /* logicalLimit */
1998 : do { /* RTL */
1999 : *aIndexMap++ = --logicalStart;
2000 : } while(++visualStart<visualLimit);
2001 : }
2002 : /* visualStart==visualLimit; */
2003 : }
2004 : return NS_OK;
2005 : }
2006 : }
2007 :
2008 : /* reorder a line based on a levels array (L2) ------------------------------ */
2009 :
2010 : nsresult nsBidi::ReorderLogical(const nsBidiLevel *aLevels, PRInt32 aLength, PRInt32 *aIndexMap)
2011 : {
2012 : PRInt32 start, limit, sumOfSosEos;
2013 : nsBidiLevel minLevel, maxLevel;
2014 :
2015 : if(aIndexMap==NULL || !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
2016 : return NS_OK;
2017 : }
2018 :
2019 : /* nothing to do? */
2020 : if(minLevel==maxLevel && (minLevel&1)==0) {
2021 : return NS_OK;
2022 : }
2023 :
2024 : /* reorder only down to the lowest odd level */
2025 : minLevel|=1;
2026 :
2027 : /* loop maxLevel..minLevel */
2028 : do {
2029 : start=0;
2030 :
2031 : /* loop for all sequences of levels to reorder at the current maxLevel */
2032 : for(;;) {
2033 : /* look for a sequence of levels that are all at >=maxLevel */
2034 : /* look for the first index of such a sequence */
2035 : while(start<aLength && aLevels[start]<maxLevel) {
2036 : ++start;
2037 : }
2038 : if(start>=aLength) {
2039 : break; /* no more such sequences */
2040 : }
2041 :
2042 : /* look for the limit of such a sequence (the index behind it) */
2043 : for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
2044 :
2045 : /*
2046 : * sos=start of sequence, eos=end of sequence
2047 : *
2048 : * The closed (inclusive) interval from sos to eos includes all the logical
2049 : * and visual indexes within this sequence. They are logically and
2050 : * visually contiguous and in the same range.
2051 : *
2052 : * For each run, the new visual index=sos+eos-old visual index;
2053 : * we pre-add sos+eos into sumOfSosEos ->
2054 : * new visual index=sumOfSosEos-old visual index;
2055 : */
2056 : sumOfSosEos=start+limit-1;
2057 :
2058 : /* reorder each index in the sequence */
2059 : do {
2060 : aIndexMap[start]=sumOfSosEos-aIndexMap[start];
2061 : } while(++start<limit);
2062 :
2063 : /* start==limit */
2064 : if(limit==aLength) {
2065 : break; /* no more such sequences */
2066 : } else {
2067 : start=limit+1;
2068 : }
2069 : }
2070 : } while(--maxLevel>=minLevel);
2071 :
2072 : return NS_OK;
2073 : }
2074 :
2075 : nsresult nsBidi::InvertMap(const PRInt32 *aSrcMap, PRInt32 *aDestMap, PRInt32 aLength)
2076 : {
2077 : if(aSrcMap!=NULL && aDestMap!=NULL) {
2078 : aSrcMap+=aLength;
2079 : while(aLength>0) {
2080 : aDestMap[*--aSrcMap]=--aLength;
2081 : }
2082 : }
2083 : return NS_OK;
2084 : }
2085 :
2086 : PRInt32 nsBidi::doWriteReverse(const PRUnichar *src, PRInt32 srcLength,
2087 : PRUnichar *dest, PRUint16 options) {
2088 : /*
2089 : * RTL run -
2090 : *
2091 : * RTL runs need to be copied to the destination in reverse order
2092 : * of code points, not code units, to keep Unicode characters intact.
2093 : *
2094 : * The general strategy for this is to read the source text
2095 : * in backward order, collect all code units for a code point
2096 : * (and optionally following combining characters, see below),
2097 : * and copy all these code units in ascending order
2098 : * to the destination for this run.
2099 : *
2100 : * Several options request whether combining characters
2101 : * should be kept after their base characters,
2102 : * whether Bidi control characters should be removed, and
2103 : * whether characters should be replaced by their mirror-image
2104 : * equivalent Unicode characters.
2105 : */
2106 : PRInt32 i, j, destSize;
2107 : PRUint32 c;
2108 :
2109 : /* optimize for several combinations of options */
2110 : switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) {
2111 : case 0:
2112 : /*
2113 : * With none of the "complicated" options set, the destination
2114 : * run will have the same length as the source run,
2115 : * and there is no mirroring and no keeping combining characters
2116 : * with their base characters.
2117 : */
2118 : destSize=srcLength;
2119 :
2120 : /* preserve character integrity */
2121 : do {
2122 : /* i is always after the last code unit known to need to be kept in this segment */
2123 : i=srcLength;
2124 :
2125 : /* collect code units for one base character */
2126 : UTF_BACK_1(src, 0, srcLength);
2127 :
2128 : /* copy this base character */
2129 : j=srcLength;
2130 : do {
2131 : *dest++=src[j++];
2132 : } while(j<i);
2133 : } while(srcLength>0);
2134 : break;
2135 : case NSBIDI_KEEP_BASE_COMBINING:
2136 : /*
2137 : * Here, too, the destination
2138 : * run will have the same length as the source run,
2139 : * and there is no mirroring.
2140 : * We do need to keep combining characters with their base characters.
2141 : */
2142 : destSize=srcLength;
2143 :
2144 : /* preserve character integrity */
2145 : do {
2146 : /* i is always after the last code unit known to need to be kept in this segment */
2147 : i=srcLength;
2148 :
2149 : /* collect code units and modifier letters for one base character */
2150 : do {
2151 : UTF_PREV_CHAR(src, 0, srcLength, c);
2152 : } while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM));
2153 :
2154 : /* copy this "user character" */
2155 : j=srcLength;
2156 : do {
2157 : *dest++=src[j++];
2158 : } while(j<i);
2159 : } while(srcLength>0);
2160 : break;
2161 : default:
2162 : /*
2163 : * With several "complicated" options set, this is the most
2164 : * general and the slowest copying of an RTL run.
2165 : * We will do mirroring, remove Bidi controls, and
2166 : * keep combining characters with their base characters
2167 : * as requested.
2168 : */
2169 : if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) {
2170 : i=srcLength;
2171 : } else {
2172 : /* we need to find out the destination length of the run,
2173 : which will not include the Bidi control characters */
2174 : PRInt32 length=srcLength;
2175 : PRUnichar ch;
2176 :
2177 : i=0;
2178 : do {
2179 : ch=*src++;
2180 : if (!IsBidiControl((PRUint32)ch)) {
2181 : ++i;
2182 : }
2183 : } while(--length>0);
2184 : src-=srcLength;
2185 : }
2186 : destSize=i;
2187 :
2188 : /* preserve character integrity */
2189 : do {
2190 : /* i is always after the last code unit known to need to be kept in this segment */
2191 : i=srcLength;
2192 :
2193 : /* collect code units for one base character */
2194 : UTF_PREV_CHAR(src, 0, srcLength, c);
2195 : if(options&NSBIDI_KEEP_BASE_COMBINING) {
2196 : /* collect modifier letters for this base character */
2197 : while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)) {
2198 : UTF_PREV_CHAR(src, 0, srcLength, c);
2199 : }
2200 : }
2201 :
2202 : if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) {
2203 : /* do not copy this Bidi control character */
2204 : continue;
2205 : }
2206 :
2207 : /* copy this "user character" */
2208 : j=srcLength;
2209 : if(options&NSBIDI_DO_MIRRORING) {
2210 : /* mirror only the base character */
2211 : c = SymmSwap(c);
2212 :
2213 : PRInt32 k=0;
2214 : UTF_APPEND_CHAR_UNSAFE(dest, k, c);
2215 : dest+=k;
2216 : j+=k;
2217 : }
2218 : while(j<i) {
2219 : *dest++=src[j++];
2220 : }
2221 : } while(srcLength>0);
2222 : break;
2223 : } /* end of switch */
2224 : return destSize;
2225 : }
2226 :
2227 : nsresult nsBidi::WriteReverse(const PRUnichar *aSrc, PRInt32 aSrcLength, PRUnichar *aDest, PRUint16 aOptions, PRInt32 *aDestSize)
2228 : {
2229 : if( aSrc==NULL || aSrcLength<0 ||
2230 : aDest==NULL
2231 : ) {
2232 : return NS_ERROR_INVALID_ARG;
2233 : }
2234 :
2235 : /* do input and output overlap? */
2236 : if( aSrc>=aDest && aSrc<aDest+aSrcLength ||
2237 : aDest>=aSrc && aDest<aSrc+aSrcLength
2238 : ) {
2239 : return NS_ERROR_INVALID_ARG;
2240 : }
2241 :
2242 : if(aSrcLength>0) {
2243 : *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions);
2244 : }
2245 : return NS_OK;
2246 : }
2247 : #endif // FULL_BIDI_ENGINE
2248 : #endif // IBMBIDI
|