1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* vim:set ts=4 sts=4 sw=4 cin et: */
3 : /* ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is mozilla.org code.
17 : *
18 : * The Initial Developer of the Original Code is
19 : * Netscape Communications Corporation.
20 : * Portions created by the Initial Developer are Copyright (C) 1998
21 : * the Initial Developer. All Rights Reserved.
22 : *
23 : * Contributor(s):
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either the GNU General Public License Version 2 or later (the "GPL"), or
27 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 : /* *
40 : *
41 : *
42 : * nsWildCard.cpp: shell-like wildcard match routines
43 : *
44 : * See nsIZipReader.findEntries documentation in nsIZipReader.idl for
45 : * a description of the syntax supported by the routines in this file.
46 : *
47 : * Rob McCool
48 : *
49 : */
50 :
51 : #include "nsWildCard.h"
52 : #include "nsXPCOM.h"
53 : #include "nsCRTGlue.h"
54 : #include "nsCharTraits.h"
55 :
56 : /* -------------------- ASCII-specific character methods ------------------- */
57 :
58 : typedef int static_assert_character_code_arrangement['a' > 'A' ? 1 : -1];
59 :
60 : template<class T>
61 : static int
62 400 : alpha(T c)
63 : {
64 : return ('a' <= c && c <= 'z') ||
65 400 : ('A' <= c && c <= 'Z');
66 : }
67 :
68 : template<class T>
69 : static int
70 400 : alphanumeric(T c)
71 : {
72 400 : return ('0' <= c && c <= '9') || ::alpha(c);
73 : }
74 :
75 : template<class T>
76 : static int
77 0 : lower(T c)
78 : {
79 0 : return ('A' <= c && c <= 'Z') ? c + ('a' - 'A') : c;
80 : }
81 :
82 : template<class T>
83 : static int
84 0 : upper(T c)
85 : {
86 0 : return ('a' <= c && c <= 'z') ? c - ('a' - 'A') : c;
87 : }
88 :
89 : /* ----------------------------- _valid_subexp ---------------------------- */
90 :
91 : template<class T>
92 : static int
93 13475 : _valid_subexp(const T *expr, T stop1, T stop2)
94 : {
95 : register int x;
96 13475 : int nsc = 0; /* Number of special characters */
97 : int np; /* Number of pipe characters in union */
98 13475 : int tld = 0; /* Number of tilde characters */
99 :
100 67616 : for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) {
101 54141 : switch(expr[x]) {
102 : case '~':
103 5 : if(tld) /* at most one exclusion */
104 0 : return INVALID_SXP;
105 5 : if (stop1) /* no exclusions within unions */
106 0 : return INVALID_SXP;
107 5 : if (!expr[x+1]) /* exclusion cannot be last character */
108 0 : return INVALID_SXP;
109 5 : if (!x) /* exclusion cannot be first character */
110 0 : return INVALID_SXP;
111 5 : ++tld;
112 : /* fall through */
113 : case '*':
114 : case '?':
115 : case '$':
116 2234 : ++nsc;
117 2234 : break;
118 : case '[':
119 432 : ++nsc;
120 432 : if((!expr[++x]) || (expr[x] == ']'))
121 0 : return INVALID_SXP;
122 1296 : for(; expr[x] && (expr[x] != ']'); ++x) {
123 864 : if(expr[x] == '\\' && !expr[++x])
124 0 : return INVALID_SXP;
125 : }
126 432 : if(!expr[x])
127 0 : return INVALID_SXP;
128 432 : break;
129 : case '(':
130 5672 : ++nsc;
131 5672 : if (stop1) /* no nested unions */
132 0 : return INVALID_SXP;
133 5672 : np = -1;
134 11344 : do {
135 11344 : int t = ::_valid_subexp(&expr[++x], T(')'), T('|'));
136 11344 : if(t == 0 || t == INVALID_SXP)
137 0 : return INVALID_SXP;
138 11344 : x+=t;
139 11344 : if(!expr[x])
140 0 : return INVALID_SXP;
141 11344 : ++np;
142 : } while (expr[x] == '|' );
143 5672 : if(np < 1) /* must be at least one pipe */
144 0 : return INVALID_SXP;
145 5672 : break;
146 : case ')':
147 : case ']':
148 : case '|':
149 0 : return INVALID_SXP;
150 : case '\\':
151 0 : ++nsc;
152 0 : if(!expr[++x])
153 0 : return INVALID_SXP;
154 0 : break;
155 : default:
156 45803 : break;
157 : }
158 : }
159 13475 : if((!stop1) && (!nsc)) /* must be at least one special character */
160 0 : return NON_SXP;
161 13475 : return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP);
162 : }
163 :
164 :
165 : template<class T>
166 : int
167 2131 : NS_WildCardValid_(const T *expr)
168 : {
169 2131 : int x = ::_valid_subexp(expr, T('\0'), T('\0'));
170 2131 : return (x < 0 ? x : VALID_SXP);
171 : }
172 :
173 : int
174 2131 : NS_WildCardValid(const char *expr)
175 : {
176 2131 : return NS_WildCardValid_(expr);
177 : }
178 :
179 : int
180 0 : NS_WildCardValid(const PRUnichar *expr)
181 : {
182 0 : return NS_WildCardValid_(expr);
183 : }
184 :
185 : /* ----------------------------- _shexp_match ----------------------------- */
186 :
187 :
188 : #define MATCH 0
189 : #define NOMATCH 1
190 : #define ABORTED -1
191 :
192 : template<class T>
193 : static int
194 : _shexp_match(const T *str, const T *expr, bool case_insensitive, unsigned int level);
195 :
196 : /**
197 : * Count characters until we reach a NUL character or either of the
198 : * two delimiter characters, stop1 or stop2. If we encounter a bracketed
199 : * expression, look only for NUL or ']' inside it. Do not look for stop1
200 : * or stop2 inside it. Return ABORTED if bracketed expression is unterminated.
201 : * Handle all escaping.
202 : * Return index in input string of first stop found, or ABORTED if not found.
203 : * If "dest" is non-NULL, copy counted characters to it and NUL terminate.
204 : */
205 : template<class T>
206 : static int
207 2782 : _scan_and_copy(const T *expr, T stop1, T stop2, T *dest)
208 : {
209 : register int sx; /* source index */
210 : register T cc;
211 :
212 11118 : for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) {
213 8336 : if (cc == '\\') {
214 0 : if (!expr[++sx])
215 0 : return ABORTED; /* should be impossible */
216 : }
217 8336 : else if (cc == '[') {
218 2784 : while ((cc = expr[++sx]) && cc != ']') {
219 1392 : if(cc == '\\' && !expr[++sx])
220 0 : return ABORTED;
221 : }
222 696 : if (!cc)
223 0 : return ABORTED; /* should be impossible */
224 : }
225 : }
226 2782 : if (dest && sx) {
227 : /* Copy all but the closing delimiter. */
228 1844 : memcpy(dest, expr, sx * sizeof(T));
229 1844 : dest[sx] = 0;
230 : }
231 2782 : return cc ? sx : ABORTED; /* index of closing delimiter */
232 : }
233 :
234 : /* On input, expr[0] is the opening parenthesis of a union.
235 : * See if any of the alternatives in the union matches as a pattern.
236 : * The strategy is to take each of the alternatives, in turn, and append
237 : * the rest of the expression (after the closing ')' that marks the end of
238 : * this union) to that alternative, and then see if the resultant expression
239 : * matches the input string. Repeat this until some alternative matches,
240 : * or we have an abort.
241 : */
242 : template<class T>
243 : static int
244 934 : _handle_union(const T *str, const T *expr, bool case_insensitive,
245 : unsigned int level)
246 : {
247 : register int sx; /* source index */
248 : int cp; /* source index of closing parenthesis */
249 : int count;
250 934 : int ret = NOMATCH;
251 : T *e2;
252 :
253 : /* Find the closing parenthesis that ends this union in the expression */
254 934 : cp = ::_scan_and_copy(expr, T(')'), T('\0'), static_cast<T*>(NULL));
255 934 : if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */
256 0 : return ABORTED;
257 934 : ++cp; /* now index of char after closing parenthesis */
258 934 : e2 = (T *) NS_Alloc((1 + nsCharTraits<T>::length(expr)) * sizeof(T));
259 934 : if (!e2)
260 0 : return ABORTED;
261 1844 : for (sx = 1; ; ++sx) {
262 : /* Here, expr[sx] is one character past the preceding '(' or '|'. */
263 : /* Copy everything up to the next delimiter to e2 */
264 1844 : count = ::_scan_and_copy(expr + sx, T(')'), T('|'), e2);
265 1844 : if (count == ABORTED || !count) {
266 0 : ret = ABORTED;
267 0 : break;
268 : }
269 1844 : sx += count;
270 : /* Append everything after closing parenthesis to e2. This is safe. */
271 1844 : nsCharTraits<T>::copy(e2 + count, expr + cp, nsCharTraits<T>::length(expr + cp) + 1);
272 1844 : ret = ::_shexp_match(str, e2, case_insensitive, level + 1);
273 1844 : if (ret != NOMATCH || !expr[sx] || expr[sx] == ')')
274 934 : break;
275 : }
276 934 : NS_Free(e2);
277 934 : if (sx < 2)
278 0 : ret = ABORTED;
279 934 : return ret;
280 : }
281 :
282 : /* returns 1 if val is in range from start..end, case insensitive. */
283 : static int
284 0 : _is_char_in_range(unsigned char start, unsigned char end, unsigned char val)
285 : {
286 : char map[256];
287 0 : memset(map, 0, sizeof map);
288 0 : while (start <= end)
289 0 : map[lower(start++)] = 1;
290 0 : return map[lower(val)];
291 : }
292 :
293 : template<class T>
294 : static int
295 : _shexp_match(const T *str, const T *expr, bool case_insensitive,
296 8887 : unsigned int level)
297 : {
298 : register int x; /* input string index */
299 : register int y; /* expression index */
300 : int ret,neg;
301 :
302 8887 : if (level > 20) /* Don't let the stack get too deep. */
303 0 : return ABORTED;
304 9317 : for(x = 0, y = 0; expr[y]; ++y, ++x) {
305 9215 : if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) {
306 6 : return NOMATCH;
307 : }
308 9209 : switch(expr[y]) {
309 : case '$':
310 0 : if(str[x])
311 0 : return NOMATCH;
312 0 : --x; /* we don't want loop to increment x */
313 0 : break;
314 : case '*':
315 507 : while(expr[++y] == '*'){}
316 507 : if(!expr[y])
317 1 : return MATCH;
318 6641 : while(str[x]) {
319 5700 : ret = ::_shexp_match(&str[x++], &expr[y], case_insensitive,
320 : level + 1);
321 5700 : switch(ret) {
322 : case NOMATCH:
323 5629 : continue;
324 : case ABORTED:
325 0 : return ABORTED;
326 : default:
327 71 : return MATCH;
328 : }
329 : }
330 435 : if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x]))
331 0 : return MATCH;
332 : else
333 435 : return NOMATCH;
334 : case '[': {
335 200 : T start, end = 0;
336 : int i;
337 200 : neg = ((expr[++y] == '^') && (expr[y+1] != ']'));
338 200 : if (neg)
339 0 : ++y;
340 200 : i = y;
341 200 : start = expr[i++];
342 200 : if (start == '\\')
343 0 : start = expr[i++];
344 200 : if (::alphanumeric(start) && expr[i++] == '-') {
345 0 : end = expr[i++];
346 0 : if (end == '\\')
347 0 : end = expr[i++];
348 : }
349 200 : if (::alphanumeric(end) && expr[i] == ']') {
350 : /* This is a range form: a-b */
351 0 : T val = str[x];
352 0 : if (end < start) { /* swap them */
353 0 : T tmp = end;
354 0 : end = start;
355 0 : start = tmp;
356 : }
357 0 : if (case_insensitive && ::alpha(val)) {
358 0 : val = ::_is_char_in_range((unsigned char) start,
359 : (unsigned char) end,
360 : (unsigned char) val);
361 0 : if (neg == val)
362 0 : return NOMATCH;
363 : }
364 0 : else if (neg != ((val < start) || (val > end))) {
365 0 : return NOMATCH;
366 : }
367 0 : y = i;
368 : }
369 : else {
370 : /* Not range form */
371 200 : int matched = 0;
372 600 : for (; expr[y] != ']'; y++) {
373 400 : if (expr[y] == '\\')
374 0 : ++y;
375 400 : if(case_insensitive)
376 0 : matched |= (::upper(str[x]) == ::upper(expr[y]));
377 : else
378 400 : matched |= (str[x] == expr[y]);
379 : }
380 200 : if (neg == matched)
381 56 : return NOMATCH;
382 : }
383 : }
384 144 : break;
385 : case '(':
386 934 : if (!expr[y+1])
387 0 : return ABORTED;
388 934 : return ::_handle_union(&str[x], &expr[y], case_insensitive, level + 1);
389 : case '?':
390 2 : break;
391 : case ')':
392 : case ']':
393 : case '|':
394 0 : return ABORTED;
395 : case '\\':
396 0 : ++y;
397 : /* fall through */
398 : default:
399 7566 : if(case_insensitive) {
400 0 : if(::upper(str[x]) != ::upper(expr[y]))
401 0 : return NOMATCH;
402 : }
403 : else {
404 7566 : if(str[x] != expr[y])
405 7282 : return NOMATCH;
406 : }
407 284 : break;
408 : }
409 : }
410 102 : return (str[x] ? NOMATCH : MATCH);
411 : }
412 :
413 :
414 : template<class T>
415 : static int
416 1339 : ns_WildCardMatch(const T *str, const T *xp, bool case_insensitive)
417 : {
418 1339 : T *expr = NULL;
419 1339 : int x, ret = MATCH;
420 :
421 1339 : if (!nsCharTraits<T>::find(xp, nsCharTraits<T>::length(xp), T('~')))
422 1335 : return ::_shexp_match(str, xp, case_insensitive, 0);
423 :
424 4 : expr = (T *) NS_Alloc((nsCharTraits<T>::length(xp) + 1) * sizeof(T));
425 4 : if(!expr)
426 0 : return NOMATCH;
427 4 : memcpy(expr, xp, (nsCharTraits<T>::length(xp) + 1) * sizeof(T));
428 :
429 4 : x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(NULL));
430 4 : if (x != ABORTED && expr[x] == '~') {
431 4 : expr[x++] = '\0';
432 4 : ret = ::_shexp_match(str, &expr[x], case_insensitive, 0);
433 4 : switch (ret) {
434 4 : case NOMATCH: ret = MATCH; break;
435 0 : case MATCH: ret = NOMATCH; break;
436 0 : default: break;
437 : }
438 : }
439 4 : if (ret == MATCH)
440 4 : ret = ::_shexp_match(str, expr, case_insensitive, 0);
441 :
442 4 : NS_Free(expr);
443 4 : return ret;
444 : }
445 :
446 : template<class T>
447 : int
448 1339 : NS_WildCardMatch_(const T *str, const T *expr, bool case_insensitive)
449 : {
450 1339 : int is_valid = NS_WildCardValid(expr);
451 1339 : switch(is_valid) {
452 : case INVALID_SXP:
453 0 : return -1;
454 : default:
455 1339 : return ::ns_WildCardMatch(str, expr, case_insensitive);
456 : }
457 : }
458 :
459 : int
460 1339 : NS_WildCardMatch(const char *str, const char *xp,
461 : bool case_insensitive)
462 : {
463 1339 : return NS_WildCardMatch_(str, xp, case_insensitive);
464 : }
465 :
466 : int
467 0 : NS_WildCardMatch(const PRUnichar *str, const PRUnichar *xp,
468 : bool case_insensitive)
469 : {
470 0 : return NS_WildCardMatch_(str, xp, case_insensitive);
471 : }
|