1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 : /* ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is Mozilla.
17 : *
18 : * The Initial Developer of the Original Code is IBM Corporation.
19 : * Portions created by IBM Corporation are Copyright (C) 2003
20 : * IBM Corporation. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Darin Fisher <darin@meer.net>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either the GNU General Public License Version 2 or later (the "GPL"), or
27 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 : #ifndef nsScannerString_h___
40 : #define nsScannerString_h___
41 :
42 : #include "nsString.h"
43 : #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
44 : #include "prclist.h"
45 :
46 :
47 : /**
48 : * NOTE: nsScannerString (and the other classes defined in this file) are
49 : * not related to nsAString or any of the other xpcom/string classes.
50 : *
51 : * nsScannerString is based on the nsSlidingString implementation that used
52 : * to live in xpcom/string. Now that nsAString is limited to representing
53 : * only single fragment strings, nsSlidingString can no longer be used.
54 : *
55 : * An advantage to this design is that it does not employ any virtual
56 : * functions.
57 : *
58 : * This file uses SCC-style indenting in deference to the nsSlidingString
59 : * code from which this code is derived ;-)
60 : */
61 :
62 : class nsScannerIterator;
63 : class nsScannerSubstring;
64 : class nsScannerString;
65 :
66 :
67 : /**
68 : * nsScannerBufferList
69 : *
70 : * This class maintains a list of heap-allocated Buffer objects. The buffers
71 : * are maintained in a circular linked list. Each buffer has a usage count
72 : * that is decremented by the owning nsScannerSubstring.
73 : *
74 : * The buffer list itself is reference counted. This allows the buffer list
75 : * to be shared by multiple nsScannerSubstring objects. The reference
76 : * counting is not threadsafe, which is not at all a requirement.
77 : *
78 : * When a nsScannerSubstring releases its reference to a buffer list, it
79 : * decrements the usage count of the first buffer in the buffer list that it
80 : * was referencing. It informs the buffer list that it can discard buffers
81 : * starting at that prefix. The buffer list will do so if the usage count of
82 : * that buffer is 0 and if it is the first buffer in the list. It will
83 : * continue to prune buffers starting from the front of the buffer list until
84 : * it finds a buffer that has a usage count that is non-zero.
85 : */
86 : class nsScannerBufferList
87 : {
88 : public:
89 :
90 : /**
91 : * Buffer objects are directly followed by a data segment. The start
92 : * of the data segment is determined by increment the |this| pointer
93 : * by 1 unit.
94 : */
95 : class Buffer : public PRCList
96 : {
97 : public:
98 :
99 29025 : void IncrementUsageCount() { ++mUsageCount; }
100 29025 : void DecrementUsageCount() { --mUsageCount; }
101 :
102 27699 : bool IsInUse() const { return mUsageCount != 0; }
103 :
104 4201 : const PRUnichar* DataStart() const { return (const PRUnichar*) (this+1); }
105 15128 : PRUnichar* DataStart() { return ( PRUnichar*) (this+1); }
106 :
107 84 : const PRUnichar* DataEnd() const { return mDataEnd; }
108 6332 : PRUnichar* DataEnd() { return mDataEnd; }
109 :
110 : const Buffer* Next() const { return static_cast<const Buffer*>(next); }
111 346 : Buffer* Next() { return static_cast<Buffer*>(next); }
112 :
113 : const Buffer* Prev() const { return static_cast<const Buffer*>(prev); }
114 : Buffer* Prev() { return static_cast<Buffer*>(prev); }
115 :
116 3760 : PRUint32 DataLength() const { return mDataEnd - DataStart(); }
117 3491 : void SetDataLength(PRUint32 len) { mDataEnd = DataStart() + len; }
118 :
119 : private:
120 :
121 : friend class nsScannerBufferList;
122 :
123 : PRInt32 mUsageCount;
124 : PRUnichar* mDataEnd;
125 : };
126 :
127 : /**
128 : * Position objects serve as lightweight pointers into a buffer list.
129 : * The mPosition member must be contained with mBuffer->DataStart()
130 : * and mBuffer->DataEnd().
131 : */
132 : class Position
133 : {
134 : public:
135 :
136 526 : Position() {}
137 :
138 8752 : Position( Buffer* buffer, PRUnichar* position )
139 : : mBuffer(buffer)
140 8752 : , mPosition(position)
141 8752 : {}
142 :
143 : inline
144 : Position( const nsScannerIterator& aIter );
145 :
146 : inline
147 : Position& operator=( const nsScannerIterator& aIter );
148 :
149 : static size_t Distance( const Position& p1, const Position& p2 );
150 :
151 : Buffer* mBuffer;
152 : PRUnichar* mPosition;
153 : };
154 :
155 : static Buffer* AllocBufferFromString( const nsAString& );
156 : static Buffer* AllocBuffer( PRUint32 capacity ); // capacity = number of chars
157 :
158 3576 : nsScannerBufferList( Buffer* buf )
159 3576 : : mRefCnt(0)
160 : {
161 3576 : PR_INIT_CLIST(&mBuffers);
162 3576 : PR_APPEND_LINK(buf, &mBuffers);
163 3576 : }
164 :
165 6798 : void AddRef() { ++mRefCnt; }
166 13596 : void Release() { if (--mRefCnt == 0) delete this; }
167 :
168 207 : void Append( Buffer* buf ) { PR_APPEND_LINK(buf, &mBuffers); }
169 0 : void InsertAfter( Buffer* buf, Buffer* prev ) { PR_INSERT_AFTER(buf, prev); }
170 : void SplitBuffer( const Position& );
171 : void DiscardUnreferencedPrefix( Buffer* );
172 :
173 64069 : Buffer* Head() { return static_cast<Buffer*>(PR_LIST_HEAD(&mBuffers)); }
174 : const Buffer* Head() const { return static_cast<const Buffer*>(PR_LIST_HEAD(&mBuffers)); }
175 :
176 3576 : Buffer* Tail() { return static_cast<Buffer*>(PR_LIST_TAIL(&mBuffers)); }
177 : const Buffer* Tail() const { return static_cast<const Buffer*>(PR_LIST_TAIL(&mBuffers)); }
178 :
179 : private:
180 :
181 : friend class nsScannerSubstring;
182 :
183 3576 : ~nsScannerBufferList() { ReleaseAll(); }
184 : void ReleaseAll();
185 :
186 : PRInt32 mRefCnt;
187 : PRCList mBuffers;
188 : };
189 :
190 :
191 : /**
192 : * nsScannerFragment represents a "slice" of a Buffer object.
193 : */
194 : struct nsScannerFragment
195 : {
196 : typedef nsScannerBufferList::Buffer Buffer;
197 :
198 : const Buffer* mBuffer;
199 : const PRUnichar* mFragmentStart;
200 : const PRUnichar* mFragmentEnd;
201 : };
202 :
203 :
204 : /**
205 : * nsScannerSubstring is the base class for nsScannerString. It provides
206 : * access to iterators and methods to bind the substring to another
207 : * substring or nsAString instance.
208 : *
209 : * This class owns the buffer list.
210 : */
211 : class nsScannerSubstring
212 : {
213 : public:
214 : typedef nsScannerBufferList::Buffer Buffer;
215 : typedef nsScannerBufferList::Position Position;
216 : typedef PRUint32 size_type;
217 :
218 : nsScannerSubstring();
219 : nsScannerSubstring( const nsAString& s );
220 :
221 : ~nsScannerSubstring();
222 :
223 : nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
224 : nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
225 :
226 25 : size_type Length() const { return mLength; }
227 :
228 : PRInt32 CountChar( PRUnichar ) const;
229 :
230 : void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
231 : void Rebind( const nsAString& );
232 :
233 : const nsSubstring& AsString() const;
234 :
235 : bool GetNextFragment( nsScannerFragment& ) const;
236 : bool GetPrevFragment( nsScannerFragment& ) const;
237 :
238 292 : static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
239 3491 : static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); }
240 :
241 : protected:
242 :
243 4639 : void acquire_ownership_of_buffer_list() const
244 : {
245 4639 : mBufferList->AddRef();
246 4639 : mStart.mBuffer->IncrementUsageCount();
247 4639 : }
248 :
249 5990 : void release_ownership_of_buffer_list()
250 : {
251 5990 : if (mBufferList)
252 : {
253 4639 : mStart.mBuffer->DecrementUsageCount();
254 4639 : mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
255 4639 : mBufferList->Release();
256 : }
257 5990 : }
258 :
259 3576 : void init_range_from_buffer_list()
260 : {
261 3576 : mStart.mBuffer = mBufferList->Head();
262 3576 : mStart.mPosition = mStart.mBuffer->DataStart();
263 :
264 3576 : mEnd.mBuffer = mBufferList->Tail();
265 3576 : mEnd.mPosition = mEnd.mBuffer->DataEnd();
266 :
267 3576 : mLength = Position::Distance(mStart, mEnd);
268 3576 : }
269 :
270 : Position mStart;
271 : Position mEnd;
272 : nsScannerBufferList *mBufferList;
273 : size_type mLength;
274 :
275 : // these fields are used to implement AsString
276 : nsDependentSubstring mFlattenedRep;
277 : bool mIsDirty;
278 :
279 : friend class nsScannerSharedSubstring;
280 : };
281 :
282 :
283 : /**
284 : * nsScannerString provides methods to grow and modify a buffer list.
285 : */
286 : class nsScannerString : public nsScannerSubstring
287 3288 : {
288 : public:
289 :
290 : nsScannerString( Buffer* );
291 :
292 : // you are giving ownership to the string, it takes and keeps your
293 : // buffer, deleting it when done.
294 : // Use AllocBuffer or AllocBufferFromString to create a Buffer object
295 : // for use with this function.
296 : void AppendBuffer( Buffer* );
297 :
298 : void DiscardPrefix( const nsScannerIterator& );
299 : // any other way you want to do this?
300 :
301 : void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
302 : void ReplaceCharacter(nsScannerIterator& aPosition, PRUnichar aChar);
303 : };
304 :
305 :
306 : /**
307 : * nsScannerSharedSubstring implements copy-on-write semantics for
308 : * nsScannerSubstring. When you call .writable(), it will copy the data
309 : * and return a mutable string object. This class also manages releasing
310 : * the reference to the scanner buffer when it is no longer needed.
311 : */
312 :
313 : class nsScannerSharedSubstring
314 : {
315 : public:
316 2176 : nsScannerSharedSubstring()
317 2176 : : mBuffer(nsnull), mBufferList(nsnull) { }
318 :
319 2176 : ~nsScannerSharedSubstring()
320 2176 : {
321 2176 : if (mBufferList)
322 2148 : ReleaseBuffer();
323 2176 : }
324 :
325 : // Acquire a copy-on-write reference to the given substring.
326 : NS_HIDDEN_(void) Rebind(const nsScannerIterator& aStart,
327 : const nsScannerIterator& aEnd);
328 :
329 : // Get a mutable reference to this string
330 22 : nsSubstring& writable()
331 : {
332 22 : if (mBufferList)
333 11 : MakeMutable();
334 :
335 22 : return mString;
336 : }
337 :
338 : // Get a const reference to this string
339 6924 : const nsSubstring& str() const { return mString; }
340 :
341 : private:
342 : typedef nsScannerBufferList::Buffer Buffer;
343 :
344 : NS_HIDDEN_(void) ReleaseBuffer();
345 : NS_HIDDEN_(void) MakeMutable();
346 :
347 : nsDependentSubstring mString;
348 : Buffer *mBuffer;
349 : nsScannerBufferList *mBufferList;
350 : };
351 :
352 : /**
353 : * nsScannerIterator works just like nsReadingIterator<CharT> except that
354 : * it knows how to iterate over a list of scanner buffers.
355 : */
356 : class nsScannerIterator
357 : {
358 : public:
359 : typedef nsScannerIterator self_type;
360 : typedef ptrdiff_t difference_type;
361 : typedef PRUnichar value_type;
362 : typedef const PRUnichar* pointer;
363 : typedef const PRUnichar& reference;
364 : typedef nsScannerSubstring::Buffer Buffer;
365 :
366 : protected:
367 :
368 : nsScannerFragment mFragment;
369 : const PRUnichar* mPosition;
370 : const nsScannerSubstring* mOwner;
371 :
372 : friend class nsScannerSubstring;
373 : friend class nsScannerSharedSubstring;
374 :
375 : public:
376 60851 : nsScannerIterator() {}
377 : // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK
378 : // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK
379 :
380 : inline void normalize_forward();
381 : inline void normalize_backward();
382 :
383 617694 : pointer get() const
384 : {
385 617694 : return mPosition;
386 : }
387 :
388 122512 : PRUnichar operator*() const
389 : {
390 122512 : return *get();
391 : }
392 :
393 8086 : const nsScannerFragment& fragment() const
394 : {
395 8086 : return mFragment;
396 : }
397 :
398 124849 : const Buffer* buffer() const
399 : {
400 124849 : return mFragment.mBuffer;
401 : }
402 :
403 94518 : self_type& operator++()
404 : {
405 94518 : ++mPosition;
406 94518 : normalize_forward();
407 94518 : return *this;
408 : }
409 :
410 5848 : self_type operator++( int )
411 : {
412 5848 : self_type result(*this);
413 5848 : ++mPosition;
414 5848 : normalize_forward();
415 : return result;
416 : }
417 :
418 356 : self_type& operator--()
419 : {
420 356 : normalize_backward();
421 356 : --mPosition;
422 356 : return *this;
423 : }
424 :
425 : self_type operator--( int )
426 : {
427 : self_type result(*this);
428 : normalize_backward();
429 : --mPosition;
430 : return result;
431 : }
432 :
433 16805 : difference_type size_forward() const
434 : {
435 16805 : return mFragment.mFragmentEnd - mPosition;
436 : }
437 :
438 672 : difference_type size_backward() const
439 : {
440 672 : return mPosition - mFragment.mFragmentStart;
441 : }
442 :
443 26659 : self_type& advance( difference_type n )
444 : {
445 66576 : while ( n > 0 )
446 : {
447 13258 : difference_type one_hop = NS_MIN(n, size_forward());
448 :
449 13258 : NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
450 : // perhaps I should |break| if |!one_hop|?
451 :
452 13258 : mPosition += one_hop;
453 13258 : normalize_forward();
454 13258 : n -= one_hop;
455 : }
456 :
457 53990 : while ( n < 0 )
458 : {
459 672 : normalize_backward();
460 672 : difference_type one_hop = NS_MAX(n, -size_backward());
461 :
462 672 : NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
463 : // perhaps I should |break| if |!one_hop|?
464 :
465 672 : mPosition += one_hop;
466 672 : n -= one_hop;
467 : }
468 :
469 26659 : return *this;
470 : }
471 : };
472 :
473 :
474 : inline
475 : bool
476 4043 : SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
477 : {
478 4043 : return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
479 : }
480 :
481 :
482 : /**
483 : * this class is needed in order to make use of the methods in nsAlgorithm.h
484 : */
485 : template <>
486 : struct nsCharSourceTraits<nsScannerIterator>
487 : {
488 : typedef nsScannerIterator::difference_type difference_type;
489 :
490 : static
491 : PRUint32
492 4018 : readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
493 : {
494 4018 : return PRUint32(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
495 : }
496 :
497 : static
498 : const nsScannerIterator::value_type*
499 4018 : read( const nsScannerIterator& iter )
500 : {
501 4018 : return iter.get();
502 : }
503 :
504 : static
505 : void
506 4018 : advance( nsScannerIterator& s, difference_type n )
507 : {
508 4018 : s.advance(n);
509 4018 : }
510 : };
511 :
512 :
513 : /**
514 : * inline methods follow
515 : */
516 :
517 : inline
518 : void
519 148266 : nsScannerIterator::normalize_forward()
520 : {
521 296961 : while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
522 429 : mPosition = mFragment.mFragmentStart;
523 148266 : }
524 :
525 : inline
526 : void
527 1028 : nsScannerIterator::normalize_backward()
528 : {
529 2070 : while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
530 14 : mPosition = mFragment.mFragmentEnd;
531 1028 : }
532 :
533 : inline
534 : bool
535 44830 : operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
536 : {
537 44830 : return lhs.get() == rhs.get();
538 : }
539 :
540 : inline
541 : bool
542 134660 : operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
543 : {
544 134660 : return lhs.get() != rhs.get();
545 : }
546 :
547 :
548 : inline
549 96144 : nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
550 96144 : : mBuffer(const_cast<Buffer*>(aIter.buffer()))
551 96144 : , mPosition(const_cast<PRUnichar*>(aIter.get()))
552 96144 : {}
553 :
554 : inline
555 : nsScannerBufferList::Position&
556 24353 : nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
557 : {
558 24353 : mBuffer = const_cast<Buffer*>(aIter.buffer());
559 24353 : mPosition = const_cast<PRUnichar*>(aIter.get());
560 24353 : return *this;
561 : }
562 :
563 :
564 : /**
565 : * scanner string utils
566 : *
567 : * These methods mimic the API provided by nsReadableUtils in xpcom/string.
568 : * Here we provide only the methods that the htmlparser module needs.
569 : */
570 :
571 : inline
572 : size_t
573 48072 : Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
574 : {
575 : typedef nsScannerBufferList::Position Position;
576 48072 : return Position::Distance(Position(aStart), Position(aEnd));
577 : }
578 :
579 : void
580 : CopyUnicodeTo( const nsScannerIterator& aSrcStart,
581 : const nsScannerIterator& aSrcEnd,
582 : nsAString& aDest );
583 :
584 : inline
585 : void
586 : CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
587 : {
588 : nsScannerIterator begin, end;
589 : CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
590 : }
591 :
592 : void
593 : AppendUnicodeTo( const nsScannerIterator& aSrcStart,
594 : const nsScannerIterator& aSrcEnd,
595 : nsAString& aDest );
596 :
597 : inline
598 : void
599 0 : AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
600 : {
601 0 : nsScannerIterator begin, end;
602 0 : AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
603 0 : }
604 :
605 : void
606 : AppendUnicodeTo( const nsScannerIterator& aSrcStart,
607 : const nsScannerIterator& aSrcEnd,
608 : nsScannerSharedSubstring& aDest );
609 :
610 : bool
611 : FindCharInReadable( PRUnichar aChar,
612 : nsScannerIterator& aStart,
613 : const nsScannerIterator& aEnd );
614 :
615 : bool
616 : FindInReadable( const nsAString& aPattern,
617 : nsScannerIterator& aStart,
618 : nsScannerIterator& aEnd,
619 : const nsStringComparator& = nsDefaultStringComparator() );
620 :
621 : bool
622 : RFindInReadable( const nsAString& aPattern,
623 : nsScannerIterator& aStart,
624 : nsScannerIterator& aEnd,
625 : const nsStringComparator& = nsDefaultStringComparator() );
626 :
627 : inline
628 : bool
629 25 : CaseInsensitiveFindInReadable( const nsAString& aPattern,
630 : nsScannerIterator& aStart,
631 : nsScannerIterator& aEnd )
632 : {
633 : return FindInReadable(aPattern, aStart, aEnd,
634 25 : nsCaseInsensitiveStringComparator());
635 : }
636 :
637 : #endif // !defined(nsScannerString_h___)
|