1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1999
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either the GNU General Public License Version 2 or later (the "GPL"), or
26 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsUnknownDecoder.h"
39 : #include "nsIServiceManager.h"
40 : #include "nsIStreamConverterService.h"
41 :
42 : #include "nsIPipe.h"
43 : #include "nsIInputStream.h"
44 : #include "nsIOutputStream.h"
45 : #include "nsMimeTypes.h"
46 : #include "netCore.h"
47 : #include "nsXPIDLString.h"
48 : #include "nsIPrefService.h"
49 : #include "nsIPrefBranch.h"
50 : #include "nsICategoryManager.h"
51 : #include "nsISupportsPrimitives.h"
52 : #include "nsIContentSniffer.h"
53 :
54 : #include "nsCRT.h"
55 :
56 : #include "nsIMIMEService.h"
57 :
58 : #include "nsIViewSourceChannel.h"
59 : #include "nsIHttpChannel.h"
60 : #include "nsNetCID.h"
61 :
62 :
63 : #define MAX_BUFFER_SIZE 512
64 :
65 903 : nsUnknownDecoder::nsUnknownDecoder()
66 : : mBuffer(nsnull)
67 : , mBufferLen(0)
68 903 : , mRequireHTMLsuffix(false)
69 : {
70 1806 : nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID);
71 903 : if (prefs) {
72 : bool val;
73 903 : if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val)))
74 0 : mRequireHTMLsuffix = val;
75 : }
76 903 : }
77 :
78 2708 : nsUnknownDecoder::~nsUnknownDecoder()
79 : {
80 903 : if (mBuffer) {
81 0 : delete [] mBuffer;
82 0 : mBuffer = nsnull;
83 : }
84 3610 : }
85 :
86 : // ----
87 : //
88 : // nsISupports implementation...
89 : //
90 : // ----
91 :
92 5585 : NS_IMPL_ADDREF(nsUnknownDecoder)
93 5585 : NS_IMPL_RELEASE(nsUnknownDecoder)
94 :
95 2795 : NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder)
96 2795 : NS_INTERFACE_MAP_ENTRY(nsIStreamConverter)
97 1007 : NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
98 17 : NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
99 17 : NS_INTERFACE_MAP_ENTRY(nsIContentSniffer)
100 0 : NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener)
101 0 : NS_INTERFACE_MAP_END
102 :
103 :
104 : // ----
105 : //
106 : // nsIStreamConverter methods...
107 : //
108 : // ----
109 :
110 : NS_IMETHODIMP
111 0 : nsUnknownDecoder::Convert(nsIInputStream *aFromStream,
112 : const char *aFromType,
113 : const char *aToType,
114 : nsISupports *aCtxt,
115 : nsIInputStream **aResultStream)
116 : {
117 0 : return NS_ERROR_NOT_IMPLEMENTED;
118 : }
119 :
120 : NS_IMETHODIMP
121 894 : nsUnknownDecoder::AsyncConvertData(const char *aFromType,
122 : const char *aToType,
123 : nsIStreamListener *aListener,
124 : nsISupports *aCtxt)
125 : {
126 894 : NS_ASSERTION(aListener && aFromType && aToType,
127 : "null pointer passed into multi mixed converter");
128 : // hook up our final listener. this guy gets the various On*() calls we want to throw
129 : // at him.
130 : //
131 894 : mNextListener = aListener;
132 894 : return (aListener) ? NS_OK : NS_ERROR_FAILURE;
133 : }
134 :
135 : // ----
136 : //
137 : // nsIStreamListener methods...
138 : //
139 : // ----
140 :
141 : NS_IMETHODIMP
142 894 : nsUnknownDecoder::OnDataAvailable(nsIRequest* request,
143 : nsISupports *aCtxt,
144 : nsIInputStream *aStream,
145 : PRUint32 aSourceOffset,
146 : PRUint32 aCount)
147 : {
148 894 : nsresult rv = NS_OK;
149 :
150 894 : if (!mNextListener) return NS_ERROR_FAILURE;
151 :
152 894 : if (mContentType.IsEmpty()) {
153 : PRUint32 count, len;
154 :
155 : // If the buffer has not been allocated by now, just fail...
156 733 : if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
157 :
158 : //
159 : // Determine how much of the stream should be read to fill up the
160 : // sniffer buffer...
161 : //
162 733 : if (mBufferLen + aCount >= MAX_BUFFER_SIZE) {
163 76 : count = MAX_BUFFER_SIZE-mBufferLen;
164 : } else {
165 657 : count = aCount;
166 : }
167 :
168 : // Read the data into the buffer...
169 733 : rv = aStream->Read((mBuffer+mBufferLen), count, &len);
170 733 : if (NS_FAILED(rv)) return rv;
171 :
172 733 : mBufferLen += len;
173 733 : aCount -= len;
174 :
175 733 : if (aCount) {
176 : //
177 : // Adjust the source offset... The call to FireListenerNotifications(...)
178 : // will make the first OnDataAvailable(...) call with an offset of 0.
179 : // So, this offset needs to be adjusted to reflect that...
180 : //
181 76 : aSourceOffset += mBufferLen;
182 :
183 76 : DetermineContentType(request);
184 :
185 76 : rv = FireListenerNotifications(request, aCtxt);
186 : }
187 : }
188 :
189 : // Must not fire ODA again if it failed once
190 894 : if (aCount && NS_SUCCEEDED(rv)) {
191 237 : NS_ASSERTION(!mContentType.IsEmpty(),
192 : "Content type should be known by now.");
193 :
194 237 : rv = mNextListener->OnDataAvailable(request, aCtxt, aStream,
195 237 : aSourceOffset, aCount);
196 : }
197 :
198 894 : return rv;
199 : }
200 :
201 : // ----
202 : //
203 : // nsIRequestObserver methods...
204 : //
205 : // ----
206 :
207 : NS_IMETHODIMP
208 894 : nsUnknownDecoder::OnStartRequest(nsIRequest* request, nsISupports *aCtxt)
209 : {
210 894 : nsresult rv = NS_OK;
211 :
212 894 : if (!mNextListener) return NS_ERROR_FAILURE;
213 :
214 : // Allocate the sniffer buffer...
215 894 : if (NS_SUCCEEDED(rv) && !mBuffer) {
216 894 : mBuffer = new char[MAX_BUFFER_SIZE];
217 :
218 894 : if (!mBuffer) {
219 0 : rv = NS_ERROR_OUT_OF_MEMORY;
220 : }
221 : }
222 :
223 : // Do not pass the OnStartRequest on to the next listener (yet)...
224 894 : return rv;
225 : }
226 :
227 : NS_IMETHODIMP
228 894 : nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsISupports *aCtxt,
229 : nsresult aStatus)
230 : {
231 894 : nsresult rv = NS_OK;
232 :
233 894 : if (!mNextListener) return NS_ERROR_FAILURE;
234 :
235 : //
236 : // The total amount of data is less than the size of the sniffer buffer.
237 : // Analyze the buffer now...
238 : //
239 894 : if (mContentType.IsEmpty()) {
240 818 : DetermineContentType(request);
241 :
242 818 : rv = FireListenerNotifications(request, aCtxt);
243 :
244 818 : if (NS_FAILED(rv)) {
245 28 : aStatus = rv;
246 : }
247 : }
248 :
249 894 : rv = mNextListener->OnStopRequest(request, aCtxt, aStatus);
250 894 : mNextListener = 0;
251 :
252 894 : return rv;
253 : }
254 :
255 : // ----
256 : //
257 : // nsIContentSniffer methods...
258 : //
259 : // ----
260 : NS_IMETHODIMP
261 12 : nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest,
262 : const PRUint8* aData,
263 : PRUint32 aLength,
264 : nsACString& type)
265 : {
266 12 : mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData));
267 12 : mBufferLen = aLength;
268 12 : DetermineContentType(aRequest);
269 12 : mBuffer = nsnull;
270 12 : mBufferLen = 0;
271 12 : type.Assign(mContentType);
272 12 : mContentType.Truncate();
273 12 : return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK;
274 : }
275 :
276 :
277 : // Actual sniffing code
278 :
279 901 : bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest)
280 : {
281 901 : if (!mRequireHTMLsuffix) {
282 901 : return true;
283 : }
284 :
285 0 : nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
286 0 : if (!channel) {
287 0 : NS_ERROR("QI failed");
288 0 : return false;
289 : }
290 :
291 0 : nsCOMPtr<nsIURI> uri;
292 0 : if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) {
293 0 : return false;
294 : }
295 :
296 0 : bool isLocalFile = false;
297 0 : if (NS_FAILED(uri->SchemeIs("file", &isLocalFile)) || isLocalFile) {
298 0 : return false;
299 : }
300 :
301 0 : return true;
302 : }
303 :
304 : /**
305 : * This is the array of sniffer entries that depend on "magic numbers"
306 : * in the file. Each entry has either a type associated with it (set
307 : * these with the SNIFFER_ENTRY macro) or a function to be executed
308 : * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function
309 : * should take a single nsIRequest* and returns bool -- true if
310 : * it sets mContentType, false otherwise
311 : */
312 : nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = {
313 : SNIFFER_ENTRY("%PDF-", APPLICATION_PDF),
314 :
315 : SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT),
316 :
317 : // Files that start with mailbox delimiters let's provisionally call
318 : // text/plain
319 : SNIFFER_ENTRY("From", TEXT_PLAIN),
320 : SNIFFER_ENTRY(">From", TEXT_PLAIN),
321 :
322 : // If the buffer begins with "#!" or "%!" then it is a script of
323 : // some sort... "Scripts" can include arbitrary data to be passed
324 : // to an interpreter, so we need to decide whether we can call this
325 : // text or whether it's data.
326 : SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff),
327 :
328 : // XXXbz should (and can) we also include the various ways that <?xml can
329 : // appear as UTF-16 and such? See http://www.w3.org/TR/REC-xml#sec-guessing
330 : SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)
331 : };
332 :
333 : PRUint32 nsUnknownDecoder::sSnifferEntryNum =
334 : sizeof(nsUnknownDecoder::sSnifferEntries) /
335 : sizeof(nsUnknownDecoder::nsSnifferEntry);
336 :
337 902 : void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest)
338 : {
339 902 : NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known.");
340 902 : if (!mContentType.IsEmpty()) return;
341 :
342 : // First, run through all the types we can detect reliably based on
343 : // magic numbers
344 : PRUint32 i;
345 6311 : for (i = 0; i < sSnifferEntryNum; ++i) {
346 9277 : if (mBufferLen >= sSnifferEntries[i].mByteLen && // enough data
347 3865 : memcmp(mBuffer, sSnifferEntries[i].mBytes, sSnifferEntries[i].mByteLen) == 0) { // and type matches
348 3 : NS_ASSERTION(sSnifferEntries[i].mMimeType ||
349 : sSnifferEntries[i].mContentTypeSniffer,
350 : "Must have either a type string or a function to set the type");
351 3 : NS_ASSERTION(sSnifferEntries[i].mMimeType == nsnull ||
352 : sSnifferEntries[i].mContentTypeSniffer == nsnull,
353 : "Both a type string and a type sniffing function set;"
354 : " using type string");
355 3 : if (sSnifferEntries[i].mMimeType) {
356 0 : mContentType = sSnifferEntries[i].mMimeType;
357 0 : NS_ASSERTION(!mContentType.IsEmpty(),
358 : "Content type should be known by now.");
359 0 : return;
360 : }
361 3 : if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) {
362 3 : NS_ASSERTION(!mContentType.IsEmpty(),
363 : "Content type should be known by now.");
364 3 : return;
365 : }
366 : }
367 : }
368 :
369 899 : if (TryContentSniffers(aRequest)) {
370 1 : NS_ASSERTION(!mContentType.IsEmpty(),
371 : "Content type should be known by now.");
372 1 : return;
373 : }
374 :
375 898 : if (SniffForHTML(aRequest)) {
376 0 : NS_ASSERTION(!mContentType.IsEmpty(),
377 : "Content type should be known by now.");
378 0 : return;
379 : }
380 :
381 : // We don't know what this is yet. Before we just give up, try
382 : // the URI from the request.
383 898 : if (SniffURI(aRequest)) {
384 4 : NS_ASSERTION(!mContentType.IsEmpty(),
385 : "Content type should be known by now.");
386 4 : return;
387 : }
388 :
389 894 : LastDitchSniff(aRequest);
390 894 : NS_ASSERTION(!mContentType.IsEmpty(),
391 : "Content type should be known by now.");
392 : }
393 :
394 899 : bool nsUnknownDecoder::TryContentSniffers(nsIRequest* aRequest)
395 : {
396 : // Enumerate content sniffers
397 1798 : nsCOMPtr<nsICategoryManager> catMan(do_GetService("@mozilla.org/categorymanager;1"));
398 899 : if (!catMan) {
399 0 : return false;
400 : }
401 :
402 1798 : nsCOMPtr<nsISimpleEnumerator> sniffers;
403 899 : catMan->EnumerateCategory("content-sniffing-services", getter_AddRefs(sniffers));
404 899 : if (!sniffers) {
405 0 : return false;
406 : }
407 :
408 : bool hasMore;
409 2696 : while (NS_SUCCEEDED(sniffers->HasMoreElements(&hasMore)) && hasMore) {
410 1798 : nsCOMPtr<nsISupports> elem;
411 899 : sniffers->GetNext(getter_AddRefs(elem));
412 899 : NS_ASSERTION(elem, "No element even though hasMore returned true!?");
413 :
414 1798 : nsCOMPtr<nsISupportsCString> sniffer_id(do_QueryInterface(elem));
415 899 : NS_ASSERTION(sniffer_id, "element is no nsISupportsCString!?");
416 1798 : nsCAutoString contractid;
417 899 : nsresult rv = sniffer_id->GetData(contractid);
418 899 : if (NS_FAILED(rv)) {
419 0 : continue;
420 : }
421 :
422 1798 : nsCOMPtr<nsIContentSniffer> sniffer(do_GetService(contractid.get()));
423 899 : if (!sniffer) {
424 0 : continue;
425 : }
426 :
427 899 : rv = sniffer->GetMIMETypeFromContent(aRequest, (const PRUint8*)mBuffer,
428 899 : mBufferLen, mContentType);
429 899 : if (NS_SUCCEEDED(rv)) {
430 1 : return true;
431 : }
432 : }
433 :
434 898 : return false;
435 : }
436 :
437 898 : bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest)
438 : {
439 : /*
440 : * To prevent a possible attack, we will not consider this to be
441 : * html content if it comes from the local file system and our prefs
442 : * are set right
443 : */
444 898 : if (!AllowSniffing(aRequest)) {
445 0 : return false;
446 : }
447 :
448 : // Now look for HTML.
449 898 : const char* str = mBuffer;
450 898 : const char* end = mBuffer + mBufferLen;
451 :
452 : // skip leading whitespace
453 1835 : while (str != end && nsCRT::IsAsciiSpace(*str)) {
454 39 : ++str;
455 : }
456 :
457 : // did we find something like a start tag?
458 898 : if (str == end || *str != '<' || ++str == end) {
459 898 : return false;
460 : }
461 :
462 : // If we seem to be SGML or XML and we got down here, just pretend we're HTML
463 0 : if (*str == '!' || *str == '?') {
464 0 : mContentType = TEXT_HTML;
465 0 : return true;
466 : }
467 :
468 0 : PRUint32 bufSize = end - str;
469 : // We use sizeof(_tagstr) below because that's the length of _tagstr
470 : // with the one char " " or ">" appended.
471 : #define MATCHES_TAG(_tagstr) \
472 : (bufSize >= sizeof(_tagstr) && \
473 : (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \
474 : PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0))
475 :
476 0 : if (MATCHES_TAG("html") ||
477 0 : MATCHES_TAG("frameset") ||
478 0 : MATCHES_TAG("body") ||
479 0 : MATCHES_TAG("head") ||
480 0 : MATCHES_TAG("script") ||
481 0 : MATCHES_TAG("iframe") ||
482 0 : MATCHES_TAG("a") ||
483 0 : MATCHES_TAG("img") ||
484 0 : MATCHES_TAG("table") ||
485 0 : MATCHES_TAG("title") ||
486 0 : MATCHES_TAG("link") ||
487 0 : MATCHES_TAG("base") ||
488 0 : MATCHES_TAG("style") ||
489 0 : MATCHES_TAG("div") ||
490 0 : MATCHES_TAG("p") ||
491 0 : MATCHES_TAG("font") ||
492 0 : MATCHES_TAG("applet") ||
493 0 : MATCHES_TAG("meta") ||
494 0 : MATCHES_TAG("center") ||
495 0 : MATCHES_TAG("form") ||
496 0 : MATCHES_TAG("isindex") ||
497 0 : MATCHES_TAG("h1") ||
498 0 : MATCHES_TAG("h2") ||
499 0 : MATCHES_TAG("h3") ||
500 0 : MATCHES_TAG("h4") ||
501 0 : MATCHES_TAG("h5") ||
502 0 : MATCHES_TAG("h6") ||
503 0 : MATCHES_TAG("b") ||
504 0 : MATCHES_TAG("pre")) {
505 :
506 0 : mContentType = TEXT_HTML;
507 0 : return true;
508 : }
509 :
510 : #undef MATCHES_TAG
511 :
512 0 : return false;
513 : }
514 :
515 3 : bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest)
516 : {
517 : // Just like HTML, this should be able to be shut off.
518 3 : if (!AllowSniffing(aRequest)) {
519 0 : return false;
520 : }
521 :
522 : // First see whether we can glean anything from the uri...
523 3 : if (!SniffURI(aRequest)) {
524 : // Oh well; just generic XML will have to do
525 3 : mContentType = TEXT_XML;
526 : }
527 :
528 3 : return true;
529 : }
530 :
531 901 : bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest)
532 : {
533 1802 : nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1"));
534 901 : if (mimeService) {
535 1802 : nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
536 901 : if (channel) {
537 1802 : nsCOMPtr<nsIURI> uri;
538 901 : nsresult result = channel->GetURI(getter_AddRefs(uri));
539 901 : if (NS_SUCCEEDED(result) && uri) {
540 1802 : nsCAutoString type;
541 901 : result = mimeService->GetTypeFromURI(uri, type);
542 901 : if (NS_SUCCEEDED(result)) {
543 4 : mContentType = type;
544 4 : return true;
545 : }
546 : }
547 : }
548 : }
549 :
550 897 : return false;
551 : }
552 :
553 : // This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31
554 : // except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
555 : // encodings like Shift_JIS) as non-text
556 : #define IS_TEXT_CHAR(ch) \
557 : (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27)
558 :
559 894 : bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest)
560 : {
561 : // All we can do now is try to guess whether this is text/plain or
562 : // application/octet-stream
563 :
564 : // First, check for a BOM. If we see one, assume this is text/plain
565 : // in whatever encoding. If there is a BOM _and_ text we will
566 : // always have at least 4 bytes in the buffer (since the 2-byte BOMs
567 : // are for 2-byte encodings and the UTF-8 BOM is 3 bytes).
568 894 : if (mBufferLen >= 4) {
569 647 : const unsigned char* buf = (const unsigned char*)mBuffer;
570 2590 : if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian
571 647 : (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian
572 647 : (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8
573 649 : (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)) { // UCS-4, Big Endian
574 :
575 0 : mContentType = TEXT_PLAIN;
576 0 : return true;
577 : }
578 : }
579 :
580 : // Now see whether the buffer has any non-text chars. If not, then let's
581 : // just call it text/plain...
582 : //
583 : PRUint32 i;
584 894 : for (i=0; i<mBufferLen && IS_TEXT_CHAR(mBuffer[i]); i++);
585 :
586 894 : if (i == mBufferLen) {
587 888 : mContentType = TEXT_PLAIN;
588 : }
589 : else {
590 6 : mContentType = APPLICATION_OCTET_STREAM;
591 : }
592 :
593 894 : return true;
594 : }
595 :
596 :
597 894 : nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request,
598 : nsISupports *aCtxt)
599 : {
600 894 : nsresult rv = NS_OK;
601 :
602 894 : if (!mNextListener) return NS_ERROR_FAILURE;
603 :
604 894 : if (!mContentType.IsEmpty()) {
605 : nsCOMPtr<nsIViewSourceChannel> viewSourceChannel =
606 1788 : do_QueryInterface(request);
607 894 : if (viewSourceChannel) {
608 0 : rv = viewSourceChannel->SetOriginalContentType(mContentType);
609 : } else {
610 1788 : nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv);
611 894 : if (NS_SUCCEEDED(rv)) {
612 : // Set the new content type on the channel...
613 894 : rv = channel->SetContentType(mContentType);
614 : }
615 : }
616 :
617 894 : NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!");
618 :
619 894 : if (NS_FAILED(rv)) {
620 : // Cancel the request to make sure it has the correct status if
621 : // mNextListener looks at it.
622 0 : request->Cancel(rv);
623 0 : mNextListener->OnStartRequest(request, aCtxt);
624 0 : return rv;
625 : }
626 : }
627 :
628 : // Fire the OnStartRequest(...)
629 894 : rv = mNextListener->OnStartRequest(request, aCtxt);
630 :
631 894 : if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
632 :
633 : // If the request was canceled, then we need to treat that equivalently
634 : // to an error returned by OnStartRequest.
635 894 : if (NS_SUCCEEDED(rv))
636 874 : request->GetStatus(&rv);
637 :
638 : // Fire the first OnDataAvailable for the data that was read from the
639 : // stream into the sniffer buffer...
640 894 : if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) {
641 712 : PRUint32 len = 0;
642 1424 : nsCOMPtr<nsIInputStream> in;
643 1424 : nsCOMPtr<nsIOutputStream> out;
644 :
645 : // Create a pipe and fill it with the data from the sniffer buffer.
646 712 : rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out),
647 712 : MAX_BUFFER_SIZE, MAX_BUFFER_SIZE);
648 :
649 712 : if (NS_SUCCEEDED(rv)) {
650 712 : rv = out->Write(mBuffer, mBufferLen, &len);
651 712 : if (NS_SUCCEEDED(rv)) {
652 712 : if (len == mBufferLen) {
653 712 : rv = mNextListener->OnDataAvailable(request, aCtxt, in, 0, len);
654 : } else {
655 0 : NS_ERROR("Unable to write all the data into the pipe.");
656 0 : rv = NS_ERROR_FAILURE;
657 : }
658 : }
659 : }
660 : }
661 :
662 894 : delete [] mBuffer;
663 894 : mBuffer = nsnull;
664 894 : mBufferLen = 0;
665 :
666 894 : return rv;
667 : }
668 :
669 : void
670 4 : nsBinaryDetector::DetermineContentType(nsIRequest* aRequest)
671 : {
672 8 : nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest);
673 4 : if (!httpChannel) {
674 : return;
675 : }
676 :
677 : // It's an HTTP channel. Check for the text/plain mess
678 2 : nsCAutoString contentTypeHdr;
679 2 : httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Type"),
680 1 : contentTypeHdr);
681 2 : nsCAutoString contentType;
682 1 : httpChannel->GetContentType(contentType);
683 :
684 : // Make sure to do a case-sensitive exact match comparison here. Apache
685 : // 1.x just sends text/plain for "unknown", while Apache 2.x sends
686 : // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to
687 : // be different, sends text/plain with iso-8859-1 charset. For extra fun,
688 : // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general
689 : // case-insensitive comparison, since we really want to apply this crap as
690 : // rarely as we can.
691 1 : if (!contentType.EqualsLiteral("text/plain") ||
692 0 : (!contentTypeHdr.EqualsLiteral("text/plain") &&
693 0 : !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") &&
694 0 : !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") &&
695 0 : !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) {
696 : return;
697 : }
698 :
699 : // Check whether we have content-encoding. If we do, don't try to
700 : // detect the type.
701 : // XXXbz we could improve this by doing a local decompress if we
702 : // wanted, I'm sure.
703 0 : nsCAutoString contentEncoding;
704 0 : httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"),
705 0 : contentEncoding);
706 0 : if (!contentEncoding.IsEmpty()) {
707 : return;
708 : }
709 :
710 0 : LastDitchSniff(aRequest);
711 0 : if (mContentType.Equals(APPLICATION_OCTET_STREAM)) {
712 : // We want to guess at it instead
713 0 : mContentType = APPLICATION_GUESS_FROM_EXT;
714 : } else {
715 : // Let the text/plain type we already have be, so that other content
716 : // sniffers can also get a shot at this data.
717 0 : mContentType.Truncate();
718 : }
719 : }
|