1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is TransforMiiX XSLT processor code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 2002
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Peter Van der Beken <peterv@propagandism.org>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either the GNU General Public License Version 2 or later (the "GPL"), or
27 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 : #ifndef __nsCharSeparatedTokenizer_h
40 : #define __nsCharSeparatedTokenizer_h
41 :
42 : #include "nsDependentSubstring.h"
43 : #include "nsCRT.h"
44 :
45 : /**
46 : * This parses a SeparatorChar-separated string into tokens.
47 : * Whitespace surrounding tokens is not treated as part of tokens, however
48 : * whitespace inside a token is. If the final token is the empty string, it is
49 : * not returned.
50 : *
51 : * Some examples, with SeparatorChar = ',':
52 : *
53 : * "foo, bar, baz" -> "foo" "bar" "baz"
54 : * "foo,bar,baz" -> "foo" "bar" "baz"
55 : * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
56 : * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
57 : * "foo,,bar,baz" -> "foo" "" "bar" "baz"
58 : * "foo,bar,baz," -> "foo" "bar" "baz"
59 : *
60 : * The function used for whitespace detection is a template argument.
61 : * By default, it is NS_IsAsciiWhitespace.
62 : */
63 : template<bool IsWhitespace(PRUnichar) = NS_IsAsciiWhitespace>
64 : class nsCharSeparatedTokenizerTemplate
65 : {
66 : public:
67 : // Flags -- only one for now. If we need more, they should be defined to
68 : // be 1<<1, 1<<2, etc. (They're masks, and aFlags/mFlags are bitfields.)
69 : enum {
70 : SEPARATOR_OPTIONAL = 1
71 : };
72 :
73 1553 : nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
74 : PRUnichar aSeparatorChar,
75 : PRUint32 aFlags = 0)
76 : : mFirstTokenBeganWithWhitespace(false),
77 : mLastTokenEndedWithWhitespace(false),
78 : mLastTokenEndedWithSeparator(false),
79 : mSeparatorChar(aSeparatorChar),
80 1553 : mFlags(aFlags)
81 : {
82 1553 : aSource.BeginReading(mIter);
83 1553 : aSource.EndReading(mEnd);
84 :
85 : // Skip initial whitespace
86 3106 : while (mIter != mEnd && IsWhitespace(*mIter)) {
87 0 : mFirstTokenBeganWithWhitespace = true;
88 0 : ++mIter;
89 : }
90 1553 : }
91 :
92 : /**
93 : * Checks if any more tokens are available.
94 : */
95 3251 : bool hasMoreTokens()
96 : {
97 3251 : NS_ASSERTION(mIter == mEnd || !IsWhitespace(*mIter),
98 : "Should be at beginning of token if there is one");
99 :
100 3251 : return mIter != mEnd;
101 : }
102 :
103 0 : bool firstTokenBeganWithWhitespace() const
104 : {
105 0 : return mFirstTokenBeganWithWhitespace;
106 : }
107 :
108 0 : bool lastTokenEndedWithSeparator() const
109 : {
110 0 : return mLastTokenEndedWithSeparator;
111 : }
112 :
113 0 : bool lastTokenEndedWithWhitespace() const
114 : {
115 0 : return mLastTokenEndedWithWhitespace;
116 : }
117 :
118 : /**
119 : * Returns the next token.
120 : */
121 1542 : const nsDependentSubstring nextToken()
122 : {
123 1542 : nsSubstring::const_char_iterator end = mIter, begin = mIter;
124 :
125 1542 : NS_ASSERTION(mIter == mEnd || !IsWhitespace(*mIter),
126 : "Should be at beginning of token if there is one");
127 :
128 : // Search until we hit separator or end (or whitespace, if separator
129 : // isn't required -- see clause with 'break' below).
130 4626 : while (mIter != mEnd && *mIter != mSeparatorChar) {
131 : // Skip to end of current word.
132 8599 : while (mIter != mEnd &&
133 : !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
134 5515 : ++mIter;
135 : }
136 1542 : end = mIter;
137 :
138 : // Skip whitespace after current word.
139 1542 : mLastTokenEndedWithWhitespace = false;
140 3084 : while (mIter != mEnd && IsWhitespace(*mIter)) {
141 0 : mLastTokenEndedWithWhitespace = true;
142 0 : ++mIter;
143 : }
144 1542 : if (mFlags & SEPARATOR_OPTIONAL) {
145 : // We've hit (and skipped) whitespace, and that's sufficient to end
146 : // our token, regardless of whether we've reached a SeparatorChar.
147 0 : break;
148 : } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
149 : }
150 :
151 1542 : mLastTokenEndedWithSeparator = (mIter != mEnd &&
152 : *mIter == mSeparatorChar);
153 1542 : NS_ASSERTION((mFlags & SEPARATOR_OPTIONAL) ||
154 : (mLastTokenEndedWithSeparator == (mIter != mEnd)),
155 : "If we require a separator and haven't hit the end of "
156 : "our string, then we shouldn't have left the loop "
157 : "unless we hit a separator");
158 :
159 : // Skip separator (and any whitespace after it), if we're at one.
160 1542 : if (mLastTokenEndedWithSeparator) {
161 0 : ++mIter;
162 :
163 0 : while (mIter != mEnd && IsWhitespace(*mIter)) {
164 0 : ++mIter;
165 : }
166 : }
167 :
168 1542 : return Substring(begin, end);
169 : }
170 :
171 : private:
172 : nsSubstring::const_char_iterator mIter, mEnd;
173 : bool mFirstTokenBeganWithWhitespace;
174 : bool mLastTokenEndedWithWhitespace;
175 : bool mLastTokenEndedWithSeparator;
176 : PRUnichar mSeparatorChar;
177 : PRUint32 mFlags;
178 : };
179 :
180 : class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<>
181 : {
182 : public:
183 0 : nsCharSeparatedTokenizer(const nsSubstring& aSource,
184 : PRUnichar aSeparatorChar,
185 : PRUint32 aFlags = 0)
186 0 : : nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)
187 : {
188 0 : }
189 : };
190 :
191 : class nsCCharSeparatedTokenizer
192 : {
193 : public:
194 328 : nsCCharSeparatedTokenizer(const nsCSubstring& aSource,
195 : char aSeparatorChar)
196 328 : : mSeparatorChar(aSeparatorChar)
197 : {
198 328 : aSource.BeginReading(mIter);
199 328 : aSource.EndReading(mEnd);
200 :
201 656 : while (mIter != mEnd && isWhitespace(*mIter)) {
202 0 : ++mIter;
203 : }
204 328 : }
205 :
206 : /**
207 : * Checks if any more tokens are available.
208 : */
209 328 : bool hasMoreTokens()
210 : {
211 328 : return mIter != mEnd;
212 : }
213 :
214 : /**
215 : * Returns the next token.
216 : */
217 0 : const nsDependentCSubstring nextToken()
218 : {
219 0 : nsCSubstring::const_char_iterator end = mIter, begin = mIter;
220 :
221 : // Search until we hit separator or end.
222 0 : while (mIter != mEnd && *mIter != mSeparatorChar) {
223 0 : while (mIter != mEnd &&
224 0 : !isWhitespace(*mIter) && *mIter != mSeparatorChar) {
225 0 : ++mIter;
226 : }
227 0 : end = mIter;
228 :
229 0 : while (mIter != mEnd && isWhitespace(*mIter)) {
230 0 : ++mIter;
231 : }
232 : }
233 :
234 : // Skip separator (and any whitespace after it).
235 0 : if (mIter != mEnd) {
236 0 : NS_ASSERTION(*mIter == mSeparatorChar, "Ended loop too soon");
237 0 : ++mIter;
238 :
239 0 : while (mIter != mEnd && isWhitespace(*mIter)) {
240 0 : ++mIter;
241 : }
242 : }
243 :
244 0 : return Substring(begin, end);
245 : }
246 :
247 : private:
248 : nsCSubstring::const_char_iterator mIter, mEnd;
249 : char mSeparatorChar;
250 :
251 0 : bool isWhitespace(unsigned char aChar)
252 : {
253 : return aChar <= ' ' &&
254 : (aChar == ' ' || aChar == '\n' ||
255 0 : aChar == '\r'|| aChar == '\t');
256 : }
257 : };
258 :
259 : #endif /* __nsCharSeparatedTokenizer_h */
|