1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
39 :
40 : #include "nsEscape.h"
41 : #include "nsMemory.h"
42 : #include "nsCRT.h"
43 : #include "nsReadableUtils.h"
44 :
45 : const int netCharType[256] =
46 : /* Bit 0 xalpha -- the alphas
47 : ** Bit 1 xpalpha -- as xalpha but
48 : ** converts spaces to plus and plus to %2B
49 : ** Bit 3 ... path -- as xalphas but doesn't escape '/'
50 : */
51 : /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
52 : { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
53 : 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
54 : 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
55 : 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
56 : 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
57 : /* bits for '@' changed from 7 to 0 so '@' can be escaped */
58 : /* in usernames and passwords in publishing. */
59 : 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
60 : 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
61 : 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
62 : 0, };
63 :
64 : /* decode % escaped hex codes into character values
65 : */
66 : #define UNHEX(C) \
67 : ((C >= '0' && C <= '9') ? C - '0' : \
68 : ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
69 : ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
70 :
71 :
72 : #define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))
73 : #define HEX_ESCAPE '%'
74 :
75 : //----------------------------------------------------------------------------------------
76 5663 : static char* nsEscapeCount(
77 : const char * str,
78 : nsEscapeMask flags,
79 : size_t* out_len)
80 : //----------------------------------------------------------------------------------------
81 : {
82 5663 : if (!str)
83 0 : return 0;
84 :
85 5663 : size_t i, len = 0, charsToEscape = 0;
86 : static const char hexChars[] = "0123456789ABCDEF";
87 :
88 5663 : register const unsigned char* src = (const unsigned char *) str;
89 122311 : while (*src)
90 : {
91 110985 : len++;
92 110985 : if (!IS_OK(*src++))
93 46275 : charsToEscape++;
94 : }
95 :
96 : // calculate how much memory should be allocated
97 : // original length + 2 bytes for each escaped character + terminating '\0'
98 : // do the sum in steps to check for overflow
99 5663 : size_t dstSize = len + 1 + charsToEscape;
100 5663 : if (dstSize <= len)
101 0 : return 0;
102 5663 : dstSize += charsToEscape;
103 5663 : if (dstSize < len)
104 0 : return 0;
105 :
106 : // fail if we need more than 4GB
107 : // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
108 : // calls NS_Alloc_P(size_t) which calls PR_Malloc(PRUint32), so there is
109 : // no chance to allocate more than 4GB using nsMemory::Alloc()
110 : if (dstSize > PR_UINT32_MAX)
111 : return 0;
112 :
113 5663 : char* result = (char *)nsMemory::Alloc(dstSize);
114 5663 : if (!result)
115 0 : return 0;
116 :
117 5663 : register unsigned char* dst = (unsigned char *) result;
118 5663 : src = (const unsigned char *) str;
119 5663 : if (flags == url_XPAlphas)
120 : {
121 1051 : for (i = 0; i < len; i++)
122 : {
123 975 : unsigned char c = *src++;
124 975 : if (IS_OK(c))
125 901 : *dst++ = c;
126 74 : else if (c == ' ')
127 74 : *dst++ = '+'; /* convert spaces to pluses */
128 : else
129 : {
130 0 : *dst++ = HEX_ESCAPE;
131 0 : *dst++ = hexChars[c >> 4]; /* high nibble */
132 0 : *dst++ = hexChars[c & 0x0f]; /* low nibble */
133 : }
134 : }
135 : }
136 : else
137 : {
138 115597 : for (i = 0; i < len; i++)
139 : {
140 110010 : unsigned char c = *src++;
141 110010 : if (IS_OK(c))
142 63809 : *dst++ = c;
143 : else
144 : {
145 46201 : *dst++ = HEX_ESCAPE;
146 46201 : *dst++ = hexChars[c >> 4]; /* high nibble */
147 46201 : *dst++ = hexChars[c & 0x0f]; /* low nibble */
148 : }
149 : }
150 : }
151 :
152 5663 : *dst = '\0'; /* tack on eos */
153 5663 : if(out_len)
154 0 : *out_len = dst - (unsigned char *) result;
155 5663 : return result;
156 : }
157 :
158 : //----------------------------------------------------------------------------------------
159 5663 : char* nsEscape(const char * str, nsEscapeMask flags)
160 : //----------------------------------------------------------------------------------------
161 : {
162 5663 : if(!str)
163 0 : return NULL;
164 5663 : return nsEscapeCount(str, flags, NULL);
165 : }
166 :
167 : //----------------------------------------------------------------------------------------
168 42 : char* nsUnescape(char * str)
169 : //----------------------------------------------------------------------------------------
170 : {
171 42 : nsUnescapeCount(str);
172 42 : return str;
173 : }
174 :
175 : //----------------------------------------------------------------------------------------
176 190867 : PRInt32 nsUnescapeCount(char * str)
177 : //----------------------------------------------------------------------------------------
178 : {
179 190867 : register char *src = str;
180 190867 : register char *dst = str;
181 : static const char hexChars[] = "0123456789ABCDEFabcdef";
182 :
183 190867 : char c1[] = " ";
184 190867 : char c2[] = " ";
185 190867 : char* const pc1 = c1;
186 190867 : char* const pc2 = c2;
187 :
188 8851246 : while (*src)
189 : {
190 8469512 : c1[0] = *(src+1);
191 8469512 : if (*(src+1) == '\0')
192 187191 : c2[0] = '\0';
193 : else
194 8282321 : c2[0] = *(src+2);
195 :
196 8527244 : if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 ||
197 57732 : PL_strpbrk(pc2, hexChars) == 0 )
198 8411780 : *dst++ = *src++;
199 : else
200 : {
201 57732 : src++; /* walk over escape */
202 57732 : if (*src)
203 : {
204 57732 : *dst = UNHEX(*src) << 4;
205 57732 : src++;
206 : }
207 57732 : if (*src)
208 : {
209 57732 : *dst = (*dst + UNHEX(*src));
210 57732 : src++;
211 : }
212 57732 : dst++;
213 : }
214 : }
215 :
216 190867 : *dst = 0;
217 190867 : return (int)(dst - str);
218 :
219 : } /* NET_UnEscapeCnt */
220 :
221 :
222 : char *
223 1 : nsEscapeHTML(const char * string)
224 : {
225 1 : char *rv = nsnull;
226 : /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
227 1 : PRUint32 len = PL_strlen(string);
228 1 : if (len >= (PR_UINT32_MAX / 6))
229 0 : return nsnull;
230 :
231 1 : rv = (char *)NS_Alloc( (6 * len) + 1 );
232 1 : char *ptr = rv;
233 :
234 1 : if(rv)
235 : {
236 88 : for(; *string != '\0'; string++)
237 : {
238 87 : if(*string == '<')
239 : {
240 0 : *ptr++ = '&';
241 0 : *ptr++ = 'l';
242 0 : *ptr++ = 't';
243 0 : *ptr++ = ';';
244 : }
245 87 : else if(*string == '>')
246 : {
247 0 : *ptr++ = '&';
248 0 : *ptr++ = 'g';
249 0 : *ptr++ = 't';
250 0 : *ptr++ = ';';
251 : }
252 87 : else if(*string == '&')
253 : {
254 1 : *ptr++ = '&';
255 1 : *ptr++ = 'a';
256 1 : *ptr++ = 'm';
257 1 : *ptr++ = 'p';
258 1 : *ptr++ = ';';
259 : }
260 86 : else if (*string == '"')
261 : {
262 0 : *ptr++ = '&';
263 0 : *ptr++ = 'q';
264 0 : *ptr++ = 'u';
265 0 : *ptr++ = 'o';
266 0 : *ptr++ = 't';
267 0 : *ptr++ = ';';
268 : }
269 86 : else if (*string == '\'')
270 : {
271 0 : *ptr++ = '&';
272 0 : *ptr++ = '#';
273 0 : *ptr++ = '3';
274 0 : *ptr++ = '9';
275 0 : *ptr++ = ';';
276 : }
277 : else
278 : {
279 86 : *ptr++ = *string;
280 : }
281 : }
282 1 : *ptr = '\0';
283 : }
284 :
285 1 : return(rv);
286 : }
287 :
288 : PRUnichar *
289 0 : nsEscapeHTML2(const PRUnichar *aSourceBuffer, PRInt32 aSourceBufferLen)
290 : {
291 : // if the caller didn't calculate the length
292 0 : if (aSourceBufferLen < 0) {
293 0 : aSourceBufferLen = nsCRT::strlen(aSourceBuffer); // ...then I will
294 : }
295 :
296 : /* XXX Hardcoded max entity len. */
297 0 : if (PRUint32(aSourceBufferLen) >=
298 : ((PR_UINT32_MAX - sizeof(PRUnichar)) / (6 * sizeof(PRUnichar))) )
299 0 : return nsnull;
300 :
301 : PRUnichar *resultBuffer = (PRUnichar *)nsMemory::Alloc(aSourceBufferLen *
302 0 : 6 * sizeof(PRUnichar) + sizeof(PRUnichar('\0')));
303 0 : PRUnichar *ptr = resultBuffer;
304 :
305 0 : if (resultBuffer) {
306 : PRInt32 i;
307 :
308 0 : for(i = 0; i < aSourceBufferLen; i++) {
309 0 : if(aSourceBuffer[i] == '<') {
310 0 : *ptr++ = '&';
311 0 : *ptr++ = 'l';
312 0 : *ptr++ = 't';
313 0 : *ptr++ = ';';
314 0 : } else if(aSourceBuffer[i] == '>') {
315 0 : *ptr++ = '&';
316 0 : *ptr++ = 'g';
317 0 : *ptr++ = 't';
318 0 : *ptr++ = ';';
319 0 : } else if(aSourceBuffer[i] == '&') {
320 0 : *ptr++ = '&';
321 0 : *ptr++ = 'a';
322 0 : *ptr++ = 'm';
323 0 : *ptr++ = 'p';
324 0 : *ptr++ = ';';
325 0 : } else if (aSourceBuffer[i] == '"') {
326 0 : *ptr++ = '&';
327 0 : *ptr++ = 'q';
328 0 : *ptr++ = 'u';
329 0 : *ptr++ = 'o';
330 0 : *ptr++ = 't';
331 0 : *ptr++ = ';';
332 0 : } else if (aSourceBuffer[i] == '\'') {
333 0 : *ptr++ = '&';
334 0 : *ptr++ = '#';
335 0 : *ptr++ = '3';
336 0 : *ptr++ = '9';
337 0 : *ptr++ = ';';
338 : } else {
339 0 : *ptr++ = aSourceBuffer[i];
340 : }
341 : }
342 0 : *ptr = 0;
343 : }
344 :
345 0 : return resultBuffer;
346 : }
347 :
348 : //----------------------------------------------------------------------------------------
349 :
350 : const int EscapeChars[256] =
351 : /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
352 : {
353 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
354 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
355 : 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
356 : 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
357 : 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
358 : 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
359 : 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
360 : 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
361 : 0 /* 8x DEL */
362 : };
363 :
364 : #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))
365 :
366 : //----------------------------------------------------------------------------------------
367 :
368 : /* returns an escaped string */
369 :
370 : /* use the following flags to specify which
371 : part of an URL you want to escape:
372 :
373 : esc_Scheme = 1
374 : esc_Username = 2
375 : esc_Password = 4
376 : esc_Host = 8
377 : esc_Directory = 16
378 : esc_FileBaseName = 32
379 : esc_FileExtension = 64
380 : esc_Param = 128
381 : esc_Query = 256
382 : esc_Ref = 512
383 : */
384 :
385 : /* by default this function will not escape parts of a string
386 : that already look escaped, which means it already includes
387 : a valid hexcode. This is done to avoid multiple escapes of
388 : a string. Use the following flags to force escaping of a
389 : string:
390 :
391 : esc_Forced = 1024
392 : */
393 :
394 1052137 : bool NS_EscapeURL(const char *part,
395 : PRInt32 partLen,
396 : PRUint32 flags,
397 : nsACString &result)
398 : {
399 1052137 : if (!part) {
400 0 : NS_NOTREACHED("null pointer");
401 0 : return false;
402 : }
403 :
404 1052137 : int i = 0;
405 : static const char hexChars[] = "0123456789ABCDEF";
406 1052137 : if (partLen < 0)
407 186794 : partLen = strlen(part);
408 1052137 : bool forced = !!(flags & esc_Forced);
409 1052137 : bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
410 1052137 : bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
411 1052137 : bool writing = !!(flags & esc_AlwaysCopy);
412 1052137 : bool colon = !!(flags & esc_Colon);
413 :
414 1052137 : register const unsigned char* src = (const unsigned char *) part;
415 :
416 : char tempBuffer[100];
417 1052137 : unsigned int tempBufferPos = 0;
418 :
419 1052137 : bool previousIsNonASCII = false;
420 39002510 : for (i = 0; i < partLen; i++)
421 : {
422 37950373 : unsigned char c = *src++;
423 :
424 : // if the char has not to be escaped or whatever follows % is
425 : // a valid escaped string, just copy the char.
426 : //
427 : // Also the % will not be escaped until forced
428 : // See bugzilla bug 61269 for details why we changed this
429 : //
430 : // And, we will not escape non-ascii characters if requested.
431 : // On special request we will also escape the colon even when
432 : // not covered by the matrix.
433 : // ignoreAscii is not honored for control characters (C0 and DEL)
434 : //
435 : // And, we should escape the '|' character when it occurs after any
436 : // non-ASCII character as it may be part of a multi-byte character.
437 : //
438 : // 0x20..0x7e are the valid ASCII characters. We also escape spaces
439 : // (0x20) since they are not legal in URLs.
440 75900358 : if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
441 : || (c > 0x7f && ignoreNonAscii)
442 : || (c > 0x20 && c < 0x7f && ignoreAscii))
443 4610 : && !(c == ':' && colon)
444 37949989 : && !(previousIsNonASCII && c == '|' && !ignoreNonAscii))
445 : {
446 75899970 : if (writing)
447 5608659 : tempBuffer[tempBufferPos++] = c;
448 : }
449 : else /* do the escape magic */
450 : {
451 388 : if (!writing)
452 : {
453 85 : result.Append(part, i);
454 85 : writing = true;
455 : }
456 388 : tempBuffer[tempBufferPos++] = HEX_ESCAPE;
457 388 : tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
458 388 : tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
459 : }
460 :
461 37950373 : if (tempBufferPos >= sizeof(tempBuffer) - 4)
462 : {
463 3098 : NS_ASSERTION(writing, "should be writing");
464 3098 : tempBuffer[tempBufferPos] = '\0';
465 3098 : result += tempBuffer;
466 3098 : tempBufferPos = 0;
467 : }
468 :
469 37950373 : previousIsNonASCII = (c > 0x7f);
470 : }
471 1052137 : if (writing) {
472 189128 : tempBuffer[tempBufferPos] = '\0';
473 189128 : result += tempBuffer;
474 : }
475 1052137 : return writing;
476 : }
477 :
478 : #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
479 :
480 2576 : bool NS_UnescapeURL(const char *str, PRInt32 len, PRUint32 flags, nsACString &result)
481 : {
482 2576 : if (!str) {
483 0 : NS_NOTREACHED("null pointer");
484 0 : return false;
485 : }
486 :
487 2576 : if (len < 0)
488 0 : len = strlen(str);
489 :
490 2576 : bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
491 2576 : bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
492 2576 : bool writing = !!(flags & esc_AlwaysCopy);
493 2576 : bool skipControl = !!(flags & esc_SkipControl);
494 :
495 : static const char hexChars[] = "0123456789ABCDEFabcdef";
496 :
497 2576 : const char *last = str;
498 2576 : const char *p = str;
499 :
500 42431 : for (int i=0; i<len; ++i, ++p) {
501 : //printf("%c [i=%d of len=%d]\n", *p, i, len);
502 39855 : if (*p == HEX_ESCAPE && i < len-2) {
503 115 : unsigned char *p1 = ((unsigned char *) p) + 1;
504 115 : unsigned char *p2 = ((unsigned char *) p) + 2;
505 327 : if (ISHEX(*p1) && ISHEX(*p2) &&
506 111 : ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
507 : !(skipControl &&
508 195 : (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
509 : //printf("- p1=%c p2=%c\n", *p1, *p2);
510 98 : writing = true;
511 98 : if (p > last) {
512 : //printf("- p=%p, last=%p\n", p, last);
513 23 : result.Append(last, p - last);
514 23 : last = p;
515 : }
516 98 : char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
517 : //printf("- u=%c\n", u);
518 98 : result.Append(u);
519 98 : i += 2;
520 98 : p += 2;
521 98 : last += 3;
522 : }
523 : }
524 : }
525 2576 : if (writing && last < str + len)
526 2204 : result.Append(last, str + len - last);
527 :
528 2576 : return writing;
529 : }
|