1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is the Mozilla Communicator client code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998-2001
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Chris Waterson <waterson@netscape.com>
24 : * Robert John Churchill <rjc@netscape.com>
25 : * Pierre Phaneuf <pp@ludusdesign.com>
26 : * Bradley Baetz <bbaetz@cs.mcgill.ca>
27 : *
28 : * Alternatively, the contents of this file may be used under the terms of
29 : * either the GNU General Public License Version 2 or later (the "GPL"), or
30 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
31 : * in which case the provisions of the GPL or the LGPL are applicable instead
32 : * of those above. If you wish to allow use of your version of this file only
33 : * under the terms of either the GPL or the LGPL, and not to allow others to
34 : * use your version of this file under the terms of the MPL, indicate your
35 : * decision by deleting the provisions above and replace them with the notice
36 : * and other provisions required by the GPL or the LGPL. If you do not delete
37 : * the provisions above, a recipient may use your version of this file under
38 : * the terms of any one of the MPL, the GPL or the LGPL.
39 : *
40 : * ***** END LICENSE BLOCK ***** */
41 :
42 : /* This parsing code originally lived in xpfe/components/directory/ - bbaetz */
43 :
44 : #include "mozilla/Util.h"
45 :
46 : #include "prprf.h"
47 :
48 : #include "nsDirIndexParser.h"
49 : #include "nsReadableUtils.h"
50 : #include "nsDirIndex.h"
51 : #include "nsEscape.h"
52 : #include "nsIServiceManager.h"
53 : #include "nsIInputStream.h"
54 : #include "nsIChannel.h"
55 : #include "nsIURI.h"
56 : #include "nsCRT.h"
57 : #include "nsIPrefService.h"
58 : #include "nsIPrefBranch.h"
59 : #include "nsIPrefLocalizedString.h"
60 :
61 : using namespace mozilla;
62 :
63 0 : NS_IMPL_ISUPPORTS3(nsDirIndexParser,
64 : nsIRequestObserver,
65 : nsIStreamListener,
66 : nsIDirIndexParser)
67 :
68 0 : nsDirIndexParser::nsDirIndexParser() {
69 0 : }
70 :
71 : nsresult
72 0 : nsDirIndexParser::Init() {
73 0 : mLineStart = 0;
74 0 : mHasDescription = false;
75 0 : mFormat = nsnull;
76 :
77 : // get default charset to be used for directory listings (fallback to
78 : // ISO-8859-1 if pref is unavailable).
79 0 : NS_NAMED_LITERAL_CSTRING(kFallbackEncoding, "ISO-8859-1");
80 0 : nsXPIDLString defCharset;
81 0 : nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
82 0 : if (prefs) {
83 0 : nsCOMPtr<nsIPrefLocalizedString> prefVal;
84 0 : prefs->GetComplexValue("intl.charset.default",
85 : NS_GET_IID(nsIPrefLocalizedString),
86 0 : getter_AddRefs(prefVal));
87 0 : if (prefVal)
88 0 : prefVal->ToString(getter_Copies(defCharset));
89 : }
90 0 : if (!defCharset.IsEmpty())
91 0 : LossyCopyUTF16toASCII(defCharset, mEncoding); // charset labels are always ASCII
92 : else
93 0 : mEncoding.Assign(kFallbackEncoding);
94 :
95 : nsresult rv;
96 : // XXX not threadsafe
97 0 : if (gRefCntParser++ == 0)
98 0 : rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI);
99 : else
100 0 : rv = NS_OK;
101 :
102 0 : return rv;
103 : }
104 :
105 0 : nsDirIndexParser::~nsDirIndexParser() {
106 0 : delete[] mFormat;
107 : // XXX not threadsafe
108 0 : if (--gRefCntParser == 0) {
109 0 : NS_IF_RELEASE(gTextToSubURI);
110 : }
111 0 : }
112 :
113 : NS_IMETHODIMP
114 0 : nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) {
115 0 : mListener = aListener;
116 0 : return NS_OK;
117 : }
118 :
119 : NS_IMETHODIMP
120 0 : nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) {
121 0 : NS_IF_ADDREF(*aListener = mListener.get());
122 0 : return NS_OK;
123 : }
124 :
125 : NS_IMETHODIMP
126 0 : nsDirIndexParser::GetComment(char** aComment) {
127 0 : *aComment = ToNewCString(mComment);
128 :
129 0 : if (!*aComment)
130 0 : return NS_ERROR_OUT_OF_MEMORY;
131 :
132 0 : return NS_OK;
133 : }
134 :
135 : NS_IMETHODIMP
136 0 : nsDirIndexParser::SetEncoding(const char* aEncoding) {
137 0 : mEncoding.Assign(aEncoding);
138 0 : return NS_OK;
139 : }
140 :
141 : NS_IMETHODIMP
142 0 : nsDirIndexParser::GetEncoding(char** aEncoding) {
143 0 : *aEncoding = ToNewCString(mEncoding);
144 :
145 0 : if (!*aEncoding)
146 0 : return NS_ERROR_OUT_OF_MEMORY;
147 :
148 0 : return NS_OK;
149 : }
150 :
151 : NS_IMETHODIMP
152 0 : nsDirIndexParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aCtxt) {
153 0 : return NS_OK;
154 : }
155 :
156 : NS_IMETHODIMP
157 0 : nsDirIndexParser::OnStopRequest(nsIRequest *aRequest, nsISupports *aCtxt,
158 : nsresult aStatusCode) {
159 : // Finish up
160 0 : if (mBuf.Length() > (PRUint32) mLineStart) {
161 0 : ProcessData(aRequest, aCtxt);
162 : }
163 :
164 0 : return NS_OK;
165 : }
166 :
167 : nsDirIndexParser::Field
168 : nsDirIndexParser::gFieldTable[] = {
169 : { "Filename", FIELD_FILENAME },
170 : { "Description", FIELD_DESCRIPTION },
171 : { "Content-Length", FIELD_CONTENTLENGTH },
172 : { "Last-Modified", FIELD_LASTMODIFIED },
173 : { "Content-Type", FIELD_CONTENTTYPE },
174 : { "File-Type", FIELD_FILETYPE },
175 : { nsnull, FIELD_UNKNOWN }
176 : };
177 :
178 : nsrefcnt nsDirIndexParser::gRefCntParser = 0;
179 : nsITextToSubURI *nsDirIndexParser::gTextToSubURI;
180 :
181 : nsresult
182 0 : nsDirIndexParser::ParseFormat(const char* aFormatStr) {
183 : // Parse a "200" format line, and remember the fields and their
184 : // ordering in mFormat. Multiple 200 lines stomp on each other.
185 :
186 : // Lets find out how many elements we have.
187 : // easier to do this then realloc
188 0 : const char* pos = aFormatStr;
189 0 : unsigned int num = 0;
190 0 : do {
191 0 : while (*pos && nsCRT::IsAsciiSpace(PRUnichar(*pos)))
192 0 : ++pos;
193 :
194 0 : ++num;
195 : // There are a maximum of six allowed header fields (doubled plus
196 : // terminator, just in case) -- Bug 443299
197 0 : if (num > (2 * ArrayLength(gFieldTable)))
198 0 : return NS_ERROR_UNEXPECTED;
199 :
200 0 : if (! *pos)
201 0 : break;
202 :
203 0 : while (*pos && !nsCRT::IsAsciiSpace(PRUnichar(*pos)))
204 0 : ++pos;
205 :
206 : } while (*pos);
207 :
208 0 : delete[] mFormat;
209 0 : mFormat = new int[num+1];
210 : // Prevent NULL Deref - Bug 443299
211 0 : if (mFormat == nsnull)
212 0 : return NS_ERROR_OUT_OF_MEMORY;
213 0 : mFormat[num] = -1;
214 :
215 0 : int formatNum=0;
216 0 : do {
217 0 : while (*aFormatStr && nsCRT::IsAsciiSpace(PRUnichar(*aFormatStr)))
218 0 : ++aFormatStr;
219 :
220 0 : if (! *aFormatStr)
221 0 : break;
222 :
223 0 : nsCAutoString name;
224 0 : PRInt32 len = 0;
225 0 : while (aFormatStr[len] && !nsCRT::IsAsciiSpace(PRUnichar(aFormatStr[len])))
226 0 : ++len;
227 0 : name.SetCapacity(len + 1);
228 0 : name.Append(aFormatStr, len);
229 0 : aFormatStr += len;
230 :
231 : // Okay, we're gonna monkey with the nsStr. Bold!
232 0 : name.SetLength(nsUnescapeCount(name.BeginWriting()));
233 :
234 : // All tokens are case-insensitive - http://www.mozilla.org/projects/netlib/dirindexformat.html
235 0 : if (name.LowerCaseEqualsLiteral("description"))
236 0 : mHasDescription = true;
237 :
238 0 : for (Field* i = gFieldTable; i->mName; ++i) {
239 0 : if (name.EqualsIgnoreCase(i->mName)) {
240 0 : mFormat[formatNum] = i->mType;
241 0 : ++formatNum;
242 0 : break;
243 : }
244 : }
245 :
246 : } while (*aFormatStr);
247 :
248 0 : return NS_OK;
249 : }
250 :
251 : nsresult
252 0 : nsDirIndexParser::ParseData(nsIDirIndex *aIdx, char* aDataStr) {
253 : // Parse a "201" data line, using the field ordering specified in
254 : // mFormat.
255 :
256 0 : if (!mFormat) {
257 : // Ignore if we haven't seen a format yet.
258 0 : return NS_OK;
259 : }
260 :
261 0 : nsresult rv = NS_OK;
262 :
263 0 : nsCAutoString filename;
264 :
265 0 : for (PRInt32 i = 0; mFormat[i] != -1; ++i) {
266 : // If we've exhausted the data before we run out of fields, just
267 : // bail.
268 0 : if (! *aDataStr)
269 0 : break;
270 :
271 0 : while (*aDataStr && nsCRT::IsAsciiSpace(*aDataStr))
272 0 : ++aDataStr;
273 :
274 0 : char *value = aDataStr;
275 :
276 0 : if (*aDataStr == '"' || *aDataStr == '\'') {
277 : // it's a quoted string. snarf everything up to the next quote character
278 0 : const char quotechar = *(aDataStr++);
279 0 : ++value;
280 0 : while (*aDataStr && *aDataStr != quotechar)
281 0 : ++aDataStr;
282 0 : *aDataStr++ = '\0';
283 :
284 0 : if (! aDataStr) {
285 0 : NS_WARNING("quoted value not terminated");
286 0 : }
287 : } else {
288 : // it's unquoted. snarf until we see whitespace.
289 0 : value = aDataStr;
290 0 : while (*aDataStr && (!nsCRT::IsAsciiSpace(*aDataStr)))
291 0 : ++aDataStr;
292 0 : *aDataStr++ = '\0';
293 : }
294 :
295 0 : fieldType t = fieldType(mFormat[i]);
296 0 : switch (t) {
297 : case FIELD_FILENAME: {
298 : // don't unescape at this point, so that UnEscapeAndConvert() can
299 0 : filename = value;
300 :
301 0 : bool success = false;
302 :
303 0 : nsAutoString entryuri;
304 :
305 0 : if (gTextToSubURI) {
306 0 : PRUnichar *result = nsnull;
307 0 : if (NS_SUCCEEDED(rv = gTextToSubURI->UnEscapeAndConvert(mEncoding.get(), filename.get(),
308 : &result)) && (result)) {
309 0 : if (*result) {
310 0 : aIdx->SetLocation(filename.get());
311 0 : if (!mHasDescription)
312 0 : aIdx->SetDescription(result);
313 0 : success = true;
314 : }
315 0 : NS_Free(result);
316 : } else {
317 0 : NS_WARNING("UnEscapeAndConvert error");
318 : }
319 : }
320 :
321 0 : if (!success) {
322 : // if unsuccessfully at charset conversion, then
323 : // just fallback to unescape'ing in-place
324 : // XXX - this shouldn't be using UTF8, should it?
325 : // when can we fail to get the service, anyway? - bbaetz
326 0 : aIdx->SetLocation(filename.get());
327 0 : if (!mHasDescription) {
328 0 : aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get());
329 : }
330 : }
331 : }
332 0 : break;
333 : case FIELD_DESCRIPTION:
334 0 : nsUnescape(value);
335 0 : aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get());
336 0 : break;
337 : case FIELD_CONTENTLENGTH:
338 : {
339 : PRInt64 len;
340 0 : PRInt32 status = PR_sscanf(value, "%lld", &len);
341 0 : if (status == 1)
342 0 : aIdx->SetSize(len);
343 : else
344 0 : aIdx->SetSize(LL_MAXUINT); // LL_MAXUINT means unknown
345 : }
346 0 : break;
347 : case FIELD_LASTMODIFIED:
348 : {
349 : PRTime tm;
350 0 : nsUnescape(value);
351 0 : if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) {
352 0 : aIdx->SetLastModified(tm);
353 : }
354 : }
355 0 : break;
356 : case FIELD_CONTENTTYPE:
357 0 : aIdx->SetContentType(value);
358 0 : break;
359 : case FIELD_FILETYPE:
360 : // unescape in-place
361 0 : nsUnescape(value);
362 0 : if (!nsCRT::strcasecmp(value, "directory")) {
363 0 : aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY);
364 0 : } else if (!nsCRT::strcasecmp(value, "file")) {
365 0 : aIdx->SetType(nsIDirIndex::TYPE_FILE);
366 0 : } else if (!nsCRT::strcasecmp(value, "symbolic-link")) {
367 0 : aIdx->SetType(nsIDirIndex::TYPE_SYMLINK);
368 : } else {
369 0 : aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN);
370 : }
371 0 : break;
372 : case FIELD_UNKNOWN:
373 : // ignore
374 0 : break;
375 : }
376 : }
377 :
378 0 : return NS_OK;
379 : }
380 :
381 : NS_IMETHODIMP
382 0 : nsDirIndexParser::OnDataAvailable(nsIRequest *aRequest, nsISupports *aCtxt,
383 : nsIInputStream *aStream,
384 : PRUint32 aSourceOffset,
385 : PRUint32 aCount) {
386 0 : if (aCount < 1)
387 0 : return NS_OK;
388 :
389 0 : PRInt32 len = mBuf.Length();
390 :
391 : // Ensure that our mBuf has capacity to hold the data we're about to
392 : // read.
393 0 : if (!EnsureStringLength(mBuf, len + aCount))
394 0 : return NS_ERROR_OUT_OF_MEMORY;
395 :
396 : // Now read the data into our buffer.
397 : nsresult rv;
398 : PRUint32 count;
399 0 : rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count);
400 0 : if (NS_FAILED(rv)) return rv;
401 :
402 : // Set the string's length according to the amount of data we've read.
403 : // Note: we know this to work on nsCString. This isn't guaranteed to
404 : // work on other strings.
405 0 : mBuf.SetLength(len + count);
406 :
407 0 : return ProcessData(aRequest, aCtxt);
408 : }
409 :
410 : nsresult
411 0 : nsDirIndexParser::ProcessData(nsIRequest *aRequest, nsISupports *aCtxt) {
412 0 : if (!mListener)
413 0 : return NS_ERROR_FAILURE;
414 :
415 0 : PRInt32 numItems = 0;
416 :
417 0 : while(true) {
418 0 : ++numItems;
419 :
420 0 : PRInt32 eol = mBuf.FindCharInSet("\n\r", mLineStart);
421 0 : if (eol < 0) break;
422 0 : mBuf.SetCharAt(PRUnichar('\0'), eol);
423 :
424 0 : const char *line = mBuf.get() + mLineStart;
425 :
426 0 : PRInt32 lineLen = eol - mLineStart;
427 0 : mLineStart = eol + 1;
428 :
429 0 : if (lineLen >= 4) {
430 : nsresult rv;
431 0 : const char *buf = line;
432 :
433 0 : if (buf[0] == '1') {
434 0 : if (buf[1] == '0') {
435 0 : if (buf[2] == '0' && buf[3] == ':') {
436 : // 100. Human-readable comment line. Ignore
437 0 : } else if (buf[2] == '1' && buf[3] == ':') {
438 : // 101. Human-readable information line.
439 0 : mComment.Append(buf + 4);
440 :
441 0 : char *value = ((char *)buf) + 4;
442 0 : nsUnescape(value);
443 0 : mListener->OnInformationAvailable(aRequest, aCtxt, NS_ConvertUTF8toUTF16(value));
444 :
445 0 : } else if (buf[2] == '2' && buf[3] == ':') {
446 : // 102. Human-readable information line, HTML.
447 0 : mComment.Append(buf + 4);
448 : }
449 : }
450 0 : } else if (buf[0] == '2') {
451 0 : if (buf[1] == '0') {
452 0 : if (buf[2] == '0' && buf[3] == ':') {
453 : // 200. Define field names
454 0 : rv = ParseFormat(buf + 4);
455 0 : if (NS_FAILED(rv)) {
456 0 : return rv;
457 : }
458 0 : } else if (buf[2] == '1' && buf[3] == ':') {
459 : // 201. Field data
460 0 : nsCOMPtr<nsIDirIndex> idx = do_CreateInstance("@mozilla.org/dirIndex;1",&rv);
461 0 : if (NS_FAILED(rv))
462 0 : return rv;
463 :
464 0 : rv = ParseData(idx, ((char *)buf) + 4);
465 0 : if (NS_FAILED(rv)) {
466 0 : return rv;
467 : }
468 :
469 0 : mListener->OnIndexAvailable(aRequest, aCtxt, idx);
470 : }
471 : }
472 0 : } else if (buf[0] == '3') {
473 0 : if (buf[1] == '0') {
474 0 : if (buf[2] == '0' && buf[3] == ':') {
475 : // 300. Self-referring URL
476 0 : } else if (buf[2] == '1' && buf[3] == ':') {
477 : // 301. OUR EXTENSION - encoding
478 0 : int i = 4;
479 0 : while (buf[i] && nsCRT::IsAsciiSpace(buf[i]))
480 0 : ++i;
481 :
482 0 : if (buf[i])
483 0 : SetEncoding(buf+i);
484 : }
485 : }
486 : }
487 : }
488 : }
489 :
490 0 : return NS_OK;
491 : }
|