1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* vim:set ts=4 sw=4 sts=4 et cindent: */
3 : /* ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is mozilla.org code.
17 : *
18 : * The Initial Developer of the Original Code is
19 : * Andreas Otte.
20 : * Portions created by the Initial Developer are Copyright (C) 2000
21 : * the Initial Developer. All Rights Reserved.
22 : *
23 : * Contributor(s):
24 : * Darin Fisher <darin@netscape.com>
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either the GNU General Public License Version 2 or later (the "GPL"), or
28 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 :
40 : #include "mozilla/RangedPtr.h"
41 :
42 : #include "nsURLHelper.h"
43 : #include "nsReadableUtils.h"
44 : #include "nsIServiceManager.h"
45 : #include "nsIIOService.h"
46 : #include "nsILocalFile.h"
47 : #include "nsIURLParser.h"
48 : #include "nsIURI.h"
49 : #include "nsMemory.h"
50 : #include "nsEscape.h"
51 : #include "nsCOMPtr.h"
52 : #include "nsCRT.h"
53 : #include "nsNetCID.h"
54 : #include "netCore.h"
55 : #include "prprf.h"
56 : #include "prnetdb.h"
57 :
58 : using namespace mozilla;
59 :
60 : //----------------------------------------------------------------------------
61 : // Init/Shutdown
62 : //----------------------------------------------------------------------------
63 :
64 : static bool gInitialized = false;
65 : static nsIURLParser *gNoAuthURLParser = nsnull;
66 : static nsIURLParser *gAuthURLParser = nsnull;
67 : static nsIURLParser *gStdURLParser = nsnull;
68 :
69 : static void
70 1419 : InitGlobals()
71 : {
72 2838 : nsCOMPtr<nsIURLParser> parser;
73 :
74 1419 : parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
75 1419 : NS_ASSERTION(parser, "failed getting 'noauth' url parser");
76 1419 : if (parser) {
77 1419 : gNoAuthURLParser = parser.get();
78 1419 : NS_ADDREF(gNoAuthURLParser);
79 : }
80 :
81 1419 : parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
82 1419 : NS_ASSERTION(parser, "failed getting 'auth' url parser");
83 1419 : if (parser) {
84 1419 : gAuthURLParser = parser.get();
85 1419 : NS_ADDREF(gAuthURLParser);
86 : }
87 :
88 1419 : parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
89 1419 : NS_ASSERTION(parser, "failed getting 'std' url parser");
90 1419 : if (parser) {
91 1419 : gStdURLParser = parser.get();
92 1419 : NS_ADDREF(gStdURLParser);
93 : }
94 :
95 1419 : gInitialized = true;
96 1419 : }
97 :
98 : void
99 1419 : net_ShutdownURLHelper()
100 : {
101 1419 : if (gInitialized) {
102 1419 : NS_IF_RELEASE(gNoAuthURLParser);
103 1419 : NS_IF_RELEASE(gAuthURLParser);
104 1419 : NS_IF_RELEASE(gStdURLParser);
105 1419 : gInitialized = false;
106 : }
107 1419 : }
108 :
109 : //----------------------------------------------------------------------------
110 : // nsIURLParser getters
111 : //----------------------------------------------------------------------------
112 :
113 : nsIURLParser *
114 35477 : net_GetAuthURLParser()
115 : {
116 35477 : if (!gInitialized)
117 0 : InitGlobals();
118 35477 : return gAuthURLParser;
119 : }
120 :
121 : nsIURLParser *
122 205425 : net_GetNoAuthURLParser()
123 : {
124 205425 : if (!gInitialized)
125 0 : InitGlobals();
126 205425 : return gNoAuthURLParser;
127 : }
128 :
129 : nsIURLParser *
130 392063 : net_GetStdURLParser()
131 : {
132 392063 : if (!gInitialized)
133 1419 : InitGlobals();
134 392063 : return gStdURLParser;
135 : }
136 :
137 : //---------------------------------------------------------------------------
138 : // GetFileFromURLSpec implementations
139 : //---------------------------------------------------------------------------
140 : nsresult
141 0 : net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result)
142 : {
143 0 : nsCAutoString escPath;
144 0 : nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
145 0 : if (NS_FAILED(rv))
146 0 : return rv;
147 :
148 0 : if (escPath.Last() != '/') {
149 0 : escPath += '/';
150 : }
151 :
152 0 : result = escPath;
153 0 : return NS_OK;
154 : }
155 :
156 : nsresult
157 6330 : net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result)
158 : {
159 12660 : nsCAutoString escPath;
160 6330 : nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
161 6330 : if (NS_FAILED(rv))
162 0 : return rv;
163 :
164 : // if this file references a directory, then we need to ensure that the
165 : // URL ends with a slash. this is important since it affects the rules
166 : // for relative URL resolution when this URL is used as a base URL.
167 : // if the file does not exist, then we make no assumption about its type,
168 : // and simply leave the URL unmodified.
169 6330 : if (escPath.Last() != '/') {
170 : bool dir;
171 6330 : rv = aFile->IsDirectory(&dir);
172 6330 : if (NS_SUCCEEDED(rv) && dir)
173 1639 : escPath += '/';
174 : }
175 :
176 6330 : result = escPath;
177 6330 : return NS_OK;
178 : }
179 :
180 : //----------------------------------------------------------------------------
181 : // file:// URL parsing
182 : //----------------------------------------------------------------------------
183 :
184 : nsresult
185 61809 : net_ParseFileURL(const nsACString &inURL,
186 : nsACString &outDirectory,
187 : nsACString &outFileBaseName,
188 : nsACString &outFileExtension)
189 : {
190 : nsresult rv;
191 :
192 61809 : outDirectory.Truncate();
193 61809 : outFileBaseName.Truncate();
194 61809 : outFileExtension.Truncate();
195 :
196 123618 : const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
197 61809 : const char *url = flatURL.get();
198 :
199 : PRUint32 schemeBeg, schemeEnd;
200 61809 : rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nsnull);
201 61809 : if (NS_FAILED(rv)) return rv;
202 :
203 61809 : if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
204 0 : NS_ERROR("must be a file:// url");
205 0 : return NS_ERROR_UNEXPECTED;
206 : }
207 :
208 61809 : nsIURLParser *parser = net_GetNoAuthURLParser();
209 61809 : NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
210 :
211 : PRUint32 pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
212 : PRInt32 pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
213 :
214 : // invoke the parser to extract the URL path
215 61809 : rv = parser->ParseURL(url, flatURL.Length(),
216 : nsnull, nsnull, // don't care about scheme
217 : nsnull, nsnull, // don't care about authority
218 123618 : &pathPos, &pathLen);
219 61809 : if (NS_FAILED(rv)) return rv;
220 :
221 : // invoke the parser to extract filepath from the path
222 : rv = parser->ParsePath(url + pathPos, pathLen,
223 : &filepathPos, &filepathLen,
224 : nsnull, nsnull, // don't care about query
225 61809 : nsnull, nsnull); // don't care about ref
226 61809 : if (NS_FAILED(rv)) return rv;
227 :
228 61809 : filepathPos += pathPos;
229 :
230 : // invoke the parser to extract the directory and filename from filepath
231 : rv = parser->ParseFilePath(url + filepathPos, filepathLen,
232 : &directoryPos, &directoryLen,
233 : &basenamePos, &basenameLen,
234 61809 : &extensionPos, &extensionLen);
235 61809 : if (NS_FAILED(rv)) return rv;
236 :
237 61809 : if (directoryLen > 0)
238 61809 : outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
239 61809 : if (basenameLen > 0)
240 61806 : outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
241 61809 : if (extensionLen > 0)
242 61790 : outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
243 : // since we are using a no-auth url parser, there will never be a host
244 : // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
245 :
246 61809 : return NS_OK;
247 : }
248 :
249 : //----------------------------------------------------------------------------
250 : // path manipulation functions
251 : //----------------------------------------------------------------------------
252 :
253 : // Replace all /./ with a / while resolving URLs
254 : // But only till #?
255 : void
256 384092 : net_CoalesceDirs(netCoalesceFlags flags, char* path)
257 : {
258 : /* Stolen from the old netlib's mkparse.c.
259 : *
260 : * modifies a url of the form /foo/../foo1 -> /foo1
261 : * and /foo/./foo1 -> /foo/foo1
262 : * and /foo/foo1/.. -> /foo/
263 : */
264 384092 : char *fwdPtr = path;
265 384092 : char *urlPtr = path;
266 384092 : char *lastslash = path;
267 384092 : PRUint32 traversal = 0;
268 384092 : PRUint32 special_ftp_len = 0;
269 :
270 : /* Remember if this url is a special ftp one: */
271 384092 : if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)
272 : {
273 : /* some schemes (for example ftp) have the speciality that
274 : the path can begin // or /%2F to mark the root of the
275 : servers filesystem, a simple / only marks the root relative
276 : to the user loging in. We remember the length of the marker */
277 172 : if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
278 0 : special_ftp_len = 4;
279 172 : else if (nsCRT::strncmp(path,"//",2) == 0 )
280 0 : special_ftp_len = 2;
281 : }
282 :
283 : /* find the last slash before # or ? */
284 384092 : for(; (*fwdPtr != '\0') &&
285 : (*fwdPtr != '?') &&
286 : (*fwdPtr != '#'); ++fwdPtr)
287 : {
288 : }
289 :
290 : /* found nothing, but go back one only */
291 : /* if there is something to go back to */
292 384092 : if (fwdPtr != path && *fwdPtr == '\0')
293 : {
294 382453 : --fwdPtr;
295 : }
296 :
297 : /* search the slash */
298 384092 : for(; (fwdPtr != path) &&
299 : (*fwdPtr != '/'); --fwdPtr)
300 : {
301 : }
302 384092 : lastslash = fwdPtr;
303 384092 : fwdPtr = path;
304 :
305 : /* replace all %2E or %2e with . in the path */
306 : /* but stop at lastchar if non null */
307 24016150 : for(; (*fwdPtr != '\0') &&
308 : (*fwdPtr != '?') &&
309 : (*fwdPtr != '#') &&
310 : (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
311 : {
312 23632070 : if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&
313 12 : (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
314 : {
315 0 : *urlPtr++ = '.';
316 0 : ++fwdPtr;
317 0 : ++fwdPtr;
318 : }
319 : else
320 : {
321 23632058 : *urlPtr++ = *fwdPtr;
322 : }
323 : }
324 : // Copy remaining stuff past the #?;
325 4556305 : for (; *fwdPtr != '\0'; ++fwdPtr)
326 : {
327 4172213 : *urlPtr++ = *fwdPtr;
328 : }
329 384092 : *urlPtr = '\0'; // terminate the url
330 :
331 : // start again, this time for real
332 384092 : fwdPtr = path;
333 384092 : urlPtr = path;
334 :
335 28165597 : for(; (*fwdPtr != '\0') &&
336 : (*fwdPtr != '?') &&
337 : (*fwdPtr != '#'); ++fwdPtr)
338 : {
339 27781505 : if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
340 : {
341 : // remove . followed by slash
342 49 : ++fwdPtr;
343 : }
344 27781692 : else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&
345 158 : (*(fwdPtr+3) == '/' ||
346 34 : *(fwdPtr+3) == '\0' || // This will take care of
347 22 : *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag
348 22 : *(fwdPtr+3) == '#'))
349 : {
350 : // remove foo/..
351 : // reverse the urlPtr to the previous slash if possible
352 : // if url does not allow relative root then drop .. above root
353 : // otherwise retain them in the path
354 294 : if(traversal > 0 || !(flags &
355 22 : NET_COALESCE_ALLOW_RELATIVE_ROOT))
356 : {
357 136 : if (urlPtr != path)
358 102 : urlPtr--; // we must be going back at least by one
359 136 : for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
360 : ; // null body
361 136 : --traversal; // count back
362 : // forward the fwdPtr past the ../
363 136 : fwdPtr += 2;
364 : // if we have reached the beginning of the path
365 : // while searching for the previous / and we remember
366 : // that it is an url that begins with /%2F then
367 : // advance urlPtr again by 3 chars because /%2F already
368 : // marks the root of the path
369 136 : if (urlPtr == path && special_ftp_len > 3)
370 : {
371 0 : ++urlPtr;
372 0 : ++urlPtr;
373 0 : ++urlPtr;
374 : }
375 : // special case if we have reached the end
376 : // to preserve the last /
377 272 : if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
378 12 : ++urlPtr;
379 : }
380 : else
381 : {
382 : // there are to much /.. in this path, just copy them instead.
383 : // forward the urlPtr past the /.. and copying it
384 :
385 : // However if we remember it is an url that starts with
386 : // /%2F and urlPtr just points at the "F" of "/%2F" then do
387 : // not overwrite it with the /, just copy .. and move forward
388 : // urlPtr.
389 0 : if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
390 0 : ++urlPtr;
391 : else
392 0 : *urlPtr++ = *fwdPtr;
393 0 : ++fwdPtr;
394 0 : *urlPtr++ = *fwdPtr;
395 0 : ++fwdPtr;
396 0 : *urlPtr++ = *fwdPtr;
397 : }
398 : }
399 : else
400 : {
401 : // count the hierachie, but only if we do not have reached
402 : // the root of some special urls with a special root marker
403 27781320 : if (*fwdPtr == '/' && *(fwdPtr+1) != '.' &&
404 0 : (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
405 3234000 : traversal++;
406 : // copy the url incrementaly
407 27781320 : *urlPtr++ = *fwdPtr;
408 : }
409 : }
410 :
411 : /*
412 : * Now lets remove trailing . case
413 : * /foo/foo1/. -> /foo/foo1/
414 : */
415 :
416 384092 : if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
417 82 : urlPtr--;
418 :
419 : // Copy remaining stuff past the #?;
420 406537 : for (; *fwdPtr != '\0'; ++fwdPtr)
421 : {
422 22445 : *urlPtr++ = *fwdPtr;
423 : }
424 384092 : *urlPtr = '\0'; // terminate the url
425 384092 : }
426 :
427 : nsresult
428 0 : net_ResolveRelativePath(const nsACString &relativePath,
429 : const nsACString &basePath,
430 : nsACString &result)
431 : {
432 0 : nsCAutoString name;
433 0 : nsCAutoString path(basePath);
434 0 : bool needsDelim = false;
435 :
436 0 : if ( !path.IsEmpty() ) {
437 0 : PRUnichar last = path.Last();
438 0 : needsDelim = !(last == '/');
439 : }
440 :
441 0 : nsACString::const_iterator beg, end;
442 0 : relativePath.BeginReading(beg);
443 0 : relativePath.EndReading(end);
444 :
445 0 : bool stop = false;
446 : char c;
447 0 : for (; !stop; ++beg) {
448 0 : c = (beg == end) ? '\0' : *beg;
449 : //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
450 0 : switch (c) {
451 : case '\0':
452 : case '#':
453 : case '?':
454 0 : stop = true;
455 : // fall through...
456 : case '/':
457 : // delimiter found
458 0 : if (name.EqualsLiteral("..")) {
459 : // pop path
460 : // If we already have the delim at end, then
461 : // skip over that when searching for next one to the left
462 0 : PRInt32 offset = path.Length() - (needsDelim ? 1 : 2);
463 : // First check for errors
464 0 : if (offset < 0 )
465 0 : return NS_ERROR_MALFORMED_URI;
466 0 : PRInt32 pos = path.RFind("/", false, offset);
467 0 : if (pos >= 0)
468 0 : path.Truncate(pos + 1);
469 : else
470 0 : path.Truncate();
471 : }
472 0 : else if (name.IsEmpty() || name.EqualsLiteral(".")) {
473 : // do nothing
474 : }
475 : else {
476 : // append name to path
477 0 : if (needsDelim)
478 0 : path += '/';
479 0 : path += name;
480 0 : needsDelim = true;
481 : }
482 0 : name.Truncate();
483 0 : break;
484 :
485 : default:
486 : // append char to name
487 0 : name += c;
488 : }
489 : }
490 : // append anything left on relativePath (e.g. #..., ;..., ?...)
491 0 : if (c != '\0')
492 0 : path += Substring(--beg, end);
493 :
494 0 : result = path;
495 0 : return NS_OK;
496 : }
497 :
498 : //----------------------------------------------------------------------------
499 : // scheme fu
500 : //----------------------------------------------------------------------------
501 :
502 : /* Extract URI-Scheme if possible */
503 : nsresult
504 464679 : net_ExtractURLScheme(const nsACString &inURI,
505 : PRUint32 *startPos,
506 : PRUint32 *endPos,
507 : nsACString *scheme)
508 : {
509 : // search for something up to a colon, and call it the scheme
510 929358 : const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
511 464679 : const char* uri_start = flatURI.get();
512 464679 : const char* uri = uri_start;
513 :
514 464679 : if (!uri)
515 0 : return NS_ERROR_MALFORMED_URI;
516 :
517 : // skip leading white space
518 929358 : while (nsCRT::IsAsciiSpace(*uri))
519 0 : uri++;
520 :
521 464679 : PRUint32 start = uri - uri_start;
522 464679 : if (startPos) {
523 171820 : *startPos = start;
524 : }
525 :
526 464679 : PRUint32 length = 0;
527 : char c;
528 3512253 : while ((c = *uri++) != '\0') {
529 : // First char must be Alpha
530 3043460 : if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
531 463610 : length++;
532 : }
533 : // Next chars can be alpha + digit + some special chars
534 3105056 : else if (length > 0 && (nsCRT::IsAsciiAlpha(c) ||
535 525206 : nsCRT::IsAsciiDigit(c) || c == '+' ||
536 : c == '.' || c == '-')) {
537 2119285 : length++;
538 : }
539 : // stop if colon reached but not as first char
540 460565 : else if (c == ':' && length > 0) {
541 338790 : if (endPos) {
542 109080 : *endPos = start + length;
543 : }
544 :
545 338790 : if (scheme)
546 229710 : scheme->Assign(Substring(inURI, start, length));
547 338790 : return NS_OK;
548 : }
549 : else
550 121775 : break;
551 : }
552 125889 : return NS_ERROR_MALFORMED_URI;
553 : }
554 :
555 : bool
556 356369 : net_IsValidScheme(const char *scheme, PRUint32 schemeLen)
557 : {
558 : // first char must be alpha
559 356369 : if (!nsCRT::IsAsciiAlpha(*scheme))
560 1 : return false;
561 :
562 : // nsCStrings may have embedded nulls -- reject those too
563 2085628 : for (; schemeLen; ++scheme, --schemeLen) {
564 1729260 : if (!(nsCRT::IsAsciiAlpha(*scheme) ||
565 1482 : nsCRT::IsAsciiDigit(*scheme) ||
566 : *scheme == '+' ||
567 : *scheme == '.' ||
568 1482 : *scheme == '-'))
569 0 : return false;
570 : }
571 :
572 356368 : return true;
573 : }
574 :
575 : bool
576 427160 : net_FilterURIString(const char *str, nsACString& result)
577 : {
578 427160 : NS_PRECONDITION(str, "Must have a non-null string!");
579 427160 : bool writing = false;
580 427160 : result.Truncate();
581 427160 : const char *p = str;
582 :
583 : // Remove leading spaces, tabs, CR, LF if any.
584 854320 : while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
585 0 : writing = true;
586 0 : str = p + 1;
587 0 : p++;
588 : }
589 :
590 : // Don't strip from the scheme, because other code assumes everything
591 : // up to the ':' is the scheme, and it's bad not to have it match.
592 : // If there's no ':', strip.
593 427160 : bool found_colon = false;
594 427160 : const char *first = nsnull;
595 23672336 : while (*p) {
596 22818016 : switch (*p) {
597 : case '\t':
598 : case '\r':
599 : case '\n':
600 151 : if (found_colon) {
601 151 : writing = true;
602 : // append chars up to but not including *p
603 151 : if (p > str)
604 79 : result.Append(str, p - str);
605 151 : str = p + 1;
606 : } else {
607 : // remember where the first \t\r\n was in case we find no scheme
608 0 : if (!first)
609 0 : first = p;
610 : }
611 151 : break;
612 :
613 : case ':':
614 301924 : found_colon = true;
615 301924 : break;
616 :
617 : case '/':
618 : case '@':
619 2709503 : if (!found_colon) {
620 : // colon also has to precede / or @ to be a scheme
621 119239 : found_colon = true; // not really, but means ok to strip
622 119239 : if (first) {
623 : // go back and replace
624 0 : p = first;
625 0 : continue; // process *p again
626 : }
627 : }
628 2709503 : break;
629 :
630 : default:
631 19806438 : break;
632 : }
633 22818016 : p++;
634 :
635 : // At end, if there was no scheme, and we hit a control char, fix
636 : // it up now.
637 22818016 : if (!*p && first != nsnull && !found_colon) {
638 : // TRICKY - to avoid duplicating code, we reset the loop back
639 : // to the point we found something to do
640 0 : p = first;
641 : // This also stops us from looping after we finish
642 0 : found_colon = true; // so we'll replace \t\r\n
643 : }
644 : }
645 :
646 : // Remove trailing spaces if any
647 854320 : while (((p-1) >= str) && (*(p-1) == ' ')) {
648 0 : writing = true;
649 0 : p--;
650 : }
651 :
652 427160 : if (writing && p > str)
653 79 : result.Append(str, p - str);
654 :
655 427160 : return writing;
656 : }
657 :
658 : #if defined(XP_WIN) || defined(XP_OS2)
659 : bool
660 : net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
661 : {
662 : bool writing = false;
663 :
664 : nsACString::const_iterator beginIter, endIter;
665 : aURL.BeginReading(beginIter);
666 : aURL.EndReading(endIter);
667 :
668 : const char *s, *begin = beginIter.get();
669 :
670 : for (s = begin; s != endIter.get(); ++s)
671 : {
672 : if (*s == '\\')
673 : {
674 : writing = true;
675 : if (s > begin)
676 : aResultBuf.Append(begin, s - begin);
677 : aResultBuf += '/';
678 : begin = s + 1;
679 : }
680 : }
681 : if (writing && s > begin)
682 : aResultBuf.Append(begin, s - begin);
683 :
684 : return writing;
685 : }
686 : #endif
687 :
688 : //----------------------------------------------------------------------------
689 : // miscellaneous (i.e., stuff that should really be elsewhere)
690 : //----------------------------------------------------------------------------
691 :
692 : static inline
693 2399656 : void ToLower(char &c)
694 : {
695 2399656 : if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
696 0 : c += 'a' - 'A';
697 2399656 : }
698 :
699 : void
700 422728 : net_ToLowerCase(char *str, PRUint32 length)
701 : {
702 2822384 : for (char *end = str + length; str < end; ++str)
703 2399656 : ToLower(*str);
704 422728 : }
705 :
706 : void
707 0 : net_ToLowerCase(char *str)
708 : {
709 0 : for (; *str; ++str)
710 0 : ToLower(*str);
711 0 : }
712 :
713 : char *
714 12370 : net_FindCharInSet(const char *iter, const char *stop, const char *set)
715 : {
716 144164 : for (; iter != stop && *iter; ++iter) {
717 654361 : for (const char *s = set; *s; ++s) {
718 522567 : if (*iter == *s)
719 54 : return (char *) iter;
720 : }
721 : }
722 12316 : return (char *) iter;
723 : }
724 :
725 : char *
726 158627 : net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
727 : {
728 : repeat:
729 1449995 : for (const char *s = set; *s; ++s) {
730 1418788 : if (*iter == *s) {
731 127420 : if (++iter == stop)
732 11594 : break;
733 115826 : goto repeat;
734 : }
735 : }
736 42801 : return (char *) iter;
737 : }
738 :
739 : char *
740 18781 : net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
741 : {
742 18781 : --iter;
743 18781 : --stop;
744 :
745 18781 : if (iter == stop)
746 43 : return (char *) iter;
747 :
748 : repeat:
749 56214 : for (const char *s = set; *s; ++s) {
750 37476 : if (*iter == *s) {
751 0 : if (--iter == stop)
752 0 : break;
753 0 : goto repeat;
754 : }
755 : }
756 18738 : return (char *) iter;
757 : }
758 :
759 : #define HTTP_LWS " \t"
760 :
761 : // Return the index of the closing quote of the string, if any
762 : static PRUint32
763 73 : net_FindStringEnd(const nsCString& flatStr,
764 : PRUint32 stringStart,
765 : char stringDelim)
766 : {
767 73 : NS_ASSERTION(stringStart < flatStr.Length() &&
768 : flatStr.CharAt(stringStart) == stringDelim &&
769 : (stringDelim == '"' || stringDelim == '\''),
770 : "Invalid stringStart");
771 :
772 73 : const char set[] = { stringDelim, '\\', '\0' };
773 13 : do {
774 : // stringStart points to either the start quote or the last
775 : // escaped char (the char following a '\\')
776 :
777 : // Write to searchStart here, so that when we get back to the
778 : // top of the loop right outside this one we search from the
779 : // right place.
780 86 : PRUint32 stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
781 86 : if (stringEnd == PRUint32(kNotFound))
782 0 : return flatStr.Length();
783 :
784 86 : if (flatStr.CharAt(stringEnd) == '\\') {
785 : // Hit a backslash-escaped char. Need to skip over it.
786 13 : stringStart = stringEnd + 1;
787 13 : if (stringStart == flatStr.Length())
788 0 : return stringStart;
789 :
790 : // Go back to looking for the next escape or the string end
791 13 : continue;
792 : }
793 :
794 73 : return stringEnd;
795 :
796 : } while (true);
797 :
798 : NS_NOTREACHED("How did we get here?");
799 : return flatStr.Length();
800 : }
801 :
802 :
803 : static PRUint32
804 11111 : net_FindMediaDelimiter(const nsCString& flatStr,
805 : PRUint32 searchStart,
806 : char delimiter)
807 : {
808 60 : do {
809 : // searchStart points to the spot from which we should start looking
810 : // for the delimiter.
811 11111 : const char delimStr[] = { delimiter, '"', '\0' };
812 11111 : PRUint32 curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
813 11111 : if (curDelimPos == PRUint32(kNotFound))
814 10993 : return flatStr.Length();
815 :
816 118 : char ch = flatStr.CharAt(curDelimPos);
817 118 : if (ch == delimiter) {
818 : // Found delimiter
819 58 : return curDelimPos;
820 : }
821 :
822 : // We hit the start of a quoted string. Look for its end.
823 60 : searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
824 60 : if (searchStart == flatStr.Length())
825 0 : return searchStart;
826 :
827 60 : ++searchStart;
828 :
829 : // searchStart now points to the first char after the end of the
830 : // string, so just go back to the top of the loop and look for
831 : // |delimiter| again.
832 : } while (true);
833 :
834 : NS_NOTREACHED("How did we get here?");
835 : return flatStr.Length();
836 : }
837 :
838 : // aOffset should be added to aCharsetStart and aCharsetEnd if this
839 : // function sets them.
840 : static void
841 10989 : net_ParseMediaType(const nsACString &aMediaTypeStr,
842 : nsACString &aContentType,
843 : nsACString &aContentCharset,
844 : PRInt32 aOffset,
845 : bool *aHadCharset,
846 : PRInt32 *aCharsetStart,
847 : PRInt32 *aCharsetEnd)
848 : {
849 21978 : const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
850 10989 : const char* start = flatStr.get();
851 10989 : const char* end = start + flatStr.Length();
852 :
853 : // Trim LWS leading and trailing whitespace from type. We include '(' in
854 : // the trailing trim set to catch media-type comments, which are not at all
855 : // standard, but may occur in rare cases.
856 10989 : const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
857 10989 : const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
858 :
859 10989 : const char* charset = "";
860 10989 : const char* charsetEnd = charset;
861 : PRInt32 charsetParamStart;
862 : PRInt32 charsetParamEnd;
863 :
864 : // Iterate over parameters
865 10989 : bool typeHasCharset = false;
866 10989 : PRUint32 paramStart = flatStr.FindChar(';', typeEnd - start);
867 10989 : if (paramStart != PRUint32(kNotFound)) {
868 : // We have parameters. Iterate over them.
869 48 : PRUint32 curParamStart = paramStart + 1;
870 62 : do {
871 : PRUint32 curParamEnd =
872 62 : net_FindMediaDelimiter(flatStr, curParamStart, ';');
873 :
874 : const char* paramName = net_FindCharNotInSet(start + curParamStart,
875 : start + curParamEnd,
876 62 : HTTP_LWS);
877 : static const char charsetStr[] = "charset=";
878 62 : if (PL_strncasecmp(paramName, charsetStr,
879 62 : sizeof(charsetStr) - 1) == 0) {
880 36 : charset = paramName + sizeof(charsetStr) - 1;
881 36 : charsetEnd = start + curParamEnd;
882 36 : typeHasCharset = true;
883 36 : charsetParamStart = curParamStart - 1;
884 36 : charsetParamEnd = curParamEnd;
885 : }
886 :
887 62 : curParamStart = curParamEnd + 1;
888 62 : } while (curParamStart < flatStr.Length());
889 : }
890 :
891 10989 : bool charsetNeedsQuotedStringUnescaping = false;
892 10989 : if (typeHasCharset) {
893 : // Trim LWS leading and trailing whitespace from charset. We include
894 : // '(' in the trailing trim set to catch media-type comments, which are
895 : // not at all standard, but may occur in rare cases.
896 36 : charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
897 36 : if (*charset == '"') {
898 13 : charsetNeedsQuotedStringUnescaping = true;
899 : charsetEnd =
900 13 : start + net_FindStringEnd(flatStr, charset - start, *charset);
901 13 : charset++;
902 13 : NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
903 : } else {
904 23 : charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
905 : }
906 : }
907 :
908 : // if the server sent "*/*", it is meaningless, so do not store it.
909 : // also, if type is the same as aContentType, then just update the
910 : // charset. however, if charset is empty and aContentType hasn't
911 : // changed, then don't wipe-out an existing aContentCharset. We
912 : // also want to reject a mime-type if it does not include a slash.
913 : // some servers give junk after the charset parameter, which may
914 : // include a comma, so this check makes us a bit more tolerant.
915 :
916 21972 : if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&
917 10983 : memchr(type, '/', typeEnd - type) != NULL) {
918 : // Common case here is that aContentType is empty
919 10778 : bool eq = !aContentType.IsEmpty() &&
920 6423 : aContentType.Equals(Substring(type, typeEnd),
921 21556 : nsCaseInsensitiveCStringComparator());
922 10778 : if (!eq) {
923 10750 : aContentType.Assign(type, typeEnd - type);
924 10750 : ToLowerCase(aContentType);
925 : }
926 :
927 10778 : if ((!eq && *aHadCharset) || typeHasCharset) {
928 38 : *aHadCharset = true;
929 38 : if (charsetNeedsQuotedStringUnescaping) {
930 : // parameters using the "quoted-string" syntax need
931 : // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
932 13 : aContentCharset.Truncate();
933 133 : for (const char *c = charset; c != charsetEnd; c++) {
934 120 : if (*c == '\\' && c + 1 != charsetEnd) {
935 : // eat escape
936 1 : c++;
937 : }
938 120 : aContentCharset.Append(*c);
939 : }
940 : }
941 : else {
942 25 : aContentCharset.Assign(charset, charsetEnd - charset);
943 : }
944 38 : if (typeHasCharset) {
945 36 : *aCharsetStart = charsetParamStart + aOffset;
946 36 : *aCharsetEnd = charsetParamEnd + aOffset;
947 : }
948 : }
949 : // Only set a new charset position if this is a different type
950 : // from the last one we had and it doesn't already have a
951 : // charset param. If this is the same type, we probably want
952 : // to leave the charset position on its first occurrence.
953 10778 : if (!eq && !typeHasCharset) {
954 10716 : PRInt32 charsetStart = PRInt32(paramStart);
955 10716 : if (charsetStart == kNotFound)
956 10704 : charsetStart = flatStr.Length();
957 :
958 10716 : *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
959 : }
960 : }
961 10989 : }
962 :
963 : #undef HTTP_LWS
964 :
965 : void
966 10918 : net_ParseContentType(const nsACString &aHeaderStr,
967 : nsACString &aContentType,
968 : nsACString &aContentCharset,
969 : bool *aHadCharset)
970 : {
971 : PRInt32 dummy1, dummy2;
972 : net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
973 10918 : aHadCharset, &dummy1, &dummy2);
974 10918 : }
975 :
976 : void
977 10945 : net_ParseContentType(const nsACString &aHeaderStr,
978 : nsACString &aContentType,
979 : nsACString &aContentCharset,
980 : bool *aHadCharset,
981 : PRInt32 *aCharsetStart,
982 : PRInt32 *aCharsetEnd)
983 : {
984 : //
985 : // Augmented BNF (from RFC 2616 section 3.7):
986 : //
987 : // header-value = media-type *( LWS "," LWS media-type )
988 : // media-type = type "/" subtype *( LWS ";" LWS parameter )
989 : // type = token
990 : // subtype = token
991 : // parameter = attribute "=" value
992 : // attribute = token
993 : // value = token | quoted-string
994 : //
995 : //
996 : // Examples:
997 : //
998 : // text/html
999 : // text/html, text/html
1000 : // text/html,text/html; charset=ISO-8859-1
1001 : // text/html,text/html; charset="ISO-8859-1"
1002 : // text/html;charset=ISO-8859-1, text/html
1003 : // text/html;charset='ISO-8859-1', text/html
1004 : // application/octet-stream
1005 : //
1006 :
1007 10945 : *aHadCharset = false;
1008 21890 : const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
1009 :
1010 : // iterate over media-types. Note that ',' characters can happen
1011 : // inside quoted strings, so we need to watch out for that.
1012 10945 : PRUint32 curTypeStart = 0;
1013 10989 : do {
1014 : // curTypeStart points to the start of the current media-type. We want
1015 : // to look for its end.
1016 : PRUint32 curTypeEnd =
1017 10989 : net_FindMediaDelimiter(flatStr, curTypeStart, ',');
1018 :
1019 : // At this point curTypeEnd points to the spot where the media-type
1020 : // starting at curTypeEnd ends. Time to parse that!
1021 : net_ParseMediaType(Substring(flatStr, curTypeStart,
1022 10989 : curTypeEnd - curTypeStart),
1023 : aContentType, aContentCharset, curTypeStart,
1024 10989 : aHadCharset, aCharsetStart, aCharsetEnd);
1025 :
1026 : // And let's move on to the next media-type
1027 10989 : curTypeStart = curTypeEnd + 1;
1028 10989 : } while (curTypeStart < flatStr.Length());
1029 10945 : }
1030 :
1031 : bool
1032 11575 : net_IsValidHostName(const nsCSubstring &host)
1033 : {
1034 11575 : const char *end = host.EndReading();
1035 : // Use explicit whitelists to select which characters we are
1036 : // willing to send to lower-level DNS logic. This is more
1037 : // self-documenting, and can also be slightly faster than the
1038 : // blacklist approach, since DNS names are the common case, and
1039 : // the commonest characters will tend to be near the start of
1040 : // the list.
1041 :
1042 : // Whitelist for DNS names (RFC 1035) with extra characters added
1043 : // for pragmatic reasons "$+_"
1044 : // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
1045 11575 : if (net_FindCharNotInSet(host.BeginReading(), end,
1046 : "abcdefghijklmnopqrstuvwxyz"
1047 : ".-0123456789"
1048 11575 : "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
1049 11575 : return true;
1050 :
1051 : // Might be a valid IPv6 link-local address containing a percent sign
1052 0 : nsCAutoString strhost(host);
1053 : PRNetAddr addr;
1054 0 : return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;
1055 : }
1056 :
1057 : bool
1058 63 : net_IsValidIPv4Addr(const char *addr, PRInt32 addrLen)
1059 : {
1060 63 : RangedPtr<const char> p(addr, addrLen);
1061 :
1062 63 : PRInt32 octet = -1; // means no digit yet
1063 63 : PRInt32 dotCount = 0; // number of dots in the address
1064 :
1065 643 : for (; addrLen; ++p, --addrLen) {
1066 585 : if (*p == '.') {
1067 177 : dotCount++;
1068 177 : if (octet == -1) {
1069 : // invalid octet
1070 2 : return false;
1071 : }
1072 175 : octet = -1;
1073 408 : } else if (*p >= '0' && *p <='9') {
1074 408 : if (octet == 0) {
1075 : // leading 0 is not allowed
1076 1 : return false;
1077 407 : } else if (octet == -1) {
1078 235 : octet = *p - '0';
1079 : } else {
1080 172 : octet *= 10;
1081 172 : octet += *p - '0';
1082 172 : if (octet > 255)
1083 2 : return false;
1084 : }
1085 : } else {
1086 : // invalid character
1087 0 : return false;
1088 : }
1089 : }
1090 :
1091 58 : return (dotCount == 3 && octet != -1);
1092 : }
1093 :
1094 : bool
1095 138 : net_IsValidIPv6Addr(const char *addr, PRInt32 addrLen)
1096 : {
1097 138 : RangedPtr<const char> p(addr, addrLen);
1098 :
1099 138 : PRInt32 digits = 0; // number of digits in current block
1100 138 : PRInt32 colons = 0; // number of colons in a row during parsing
1101 138 : PRInt32 blocks = 0; // number of hexadecimal blocks
1102 138 : bool haveZeros = false; // true if double colon is present in the address
1103 :
1104 2557 : for (; addrLen; ++p, --addrLen) {
1105 2492 : if (*p == ':') {
1106 594 : if (colons == 0) {
1107 513 : if (digits != 0) {
1108 447 : digits = 0;
1109 447 : blocks++;
1110 : }
1111 81 : } else if (colons == 1) {
1112 78 : if (haveZeros)
1113 1 : return false; // only one occurrence is allowed
1114 77 : haveZeros = true;
1115 : } else {
1116 : // too many colons in a row
1117 3 : return false;
1118 : }
1119 590 : colons++;
1120 2853 : } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||
1121 955 : (*p >= 'A' && *p <= 'F')) {
1122 1833 : if (colons == 1 && blocks == 0) // starts with a single colon
1123 2 : return false;
1124 1831 : if (digits == 4) // too many digits
1125 2 : return false;
1126 1829 : colons = 0;
1127 1829 : digits++;
1128 65 : } else if (*p == '.') {
1129 : // check valid IPv4 from the beginning of the last block
1130 63 : if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits))
1131 7 : return false;
1132 56 : return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6);
1133 : } else {
1134 : // invalid character
1135 2 : return false;
1136 : }
1137 : }
1138 :
1139 65 : if (colons == 1) // ends with a single colon
1140 2 : return false;
1141 :
1142 63 : if (digits) // there is a block at the end
1143 57 : blocks++;
1144 :
1145 63 : return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8);
1146 : }
|