LCOV - code coverage report
Current view: directory - browser/components/feeds/src - nsFeedSniffer.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 126 53 42.1 %
Date: 2012-06-02 Functions: 13 6 46.2 %

       1                 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is the Feed Content Sniffer.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is Google Inc.
      18                 :  * Portions created by the Initial Developer are Copyright (C) 2006
      19                 :  * the Initial Developer. All Rights Reserved.
      20                 :  *
      21                 :  * Contributor(s):
      22                 :  *   Ben Goodger <beng@google.com>
      23                 :  *   Robert Sayre <sayrer@gmail.com>
      24                 :  *
      25                 :  * Alternatively, the contents of this file may be used under the terms of
      26                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      27                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      28                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      29                 :  * of those above. If you wish to allow use of your version of this file only
      30                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      31                 :  * use your version of this file under the terms of the MPL, indicate your
      32                 :  * decision by deleting the provisions above and replace them with the notice
      33                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      34                 :  * the provisions above, a recipient may use your version of this file under
      35                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      36                 :  *
      37                 :  * ***** END LICENSE BLOCK ***** */
      38                 : 
      39                 : #include "nsFeedSniffer.h"
      40                 : 
      41                 : #include "prmem.h"
      42                 : 
      43                 : #include "nsNetCID.h"
      44                 : #include "nsXPCOM.h"
      45                 : #include "nsCOMPtr.h"
      46                 : #include "nsStringStream.h"
      47                 : 
      48                 : #include "nsBrowserCompsCID.h"
      49                 : 
      50                 : #include "nsICategoryManager.h"
      51                 : #include "nsIServiceManager.h"
      52                 : #include "nsComponentManagerUtils.h"
      53                 : #include "nsServiceManagerUtils.h"
      54                 : 
      55                 : #include "nsIStreamConverterService.h"
      56                 : #include "nsIStreamConverter.h"
      57                 : 
      58                 : #include "nsIStreamListener.h"
      59                 : 
      60                 : #include "nsIHttpChannel.h"
      61                 : #include "nsIMIMEHeaderParam.h"
      62                 : 
      63                 : #include "nsMimeTypes.h"
      64                 : 
      65                 : #define TYPE_ATOM "application/atom+xml"
      66                 : #define TYPE_RSS "application/rss+xml"
      67                 : #define TYPE_MAYBE_FEED "application/vnd.mozilla.maybe.feed"
      68                 : 
      69                 : #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
      70                 : #define NS_RSS "http://purl.org/rss/1.0/"
      71                 : 
      72                 : #define MAX_BYTES 512
      73                 : 
      74              12 : NS_IMPL_ISUPPORTS3(nsFeedSniffer,
      75                 :                    nsIContentSniffer,
      76                 :                    nsIStreamListener,
      77                 :                    nsIRequestObserver)
      78                 : 
      79                 : nsresult
      80               1 : nsFeedSniffer::ConvertEncodedData(nsIRequest* request,
      81                 :                                   const PRUint8* data,
      82                 :                                   PRUint32 length)
      83                 : {
      84               1 :   nsresult rv = NS_OK;
      85                 : 
      86               1 :  mDecodedData = "";
      87               2 :  nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(request));
      88               1 :   if (!httpChannel)
      89               0 :     return NS_ERROR_NO_INTERFACE;
      90                 : 
      91               2 :   nsCAutoString contentEncoding;
      92               2 :   httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"), 
      93               1 :                                  contentEncoding);
      94               1 :   if (!contentEncoding.IsEmpty()) {
      95               0 :     nsCOMPtr<nsIStreamConverterService> converterService(do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID));
      96               0 :     if (converterService) {
      97               0 :       ToLowerCase(contentEncoding);
      98                 : 
      99               0 :       nsCOMPtr<nsIStreamListener> converter;
     100               0 :       rv = converterService->AsyncConvertData(contentEncoding.get(), 
     101                 :                                               "uncompressed", this, nsnull, 
     102               0 :                                               getter_AddRefs(converter));
     103               0 :       NS_ENSURE_SUCCESS(rv, rv);
     104                 : 
     105               0 :       converter->OnStartRequest(request, nsnull);
     106                 : 
     107                 :       nsCOMPtr<nsIStringInputStream> rawStream =
     108               0 :         do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
     109               0 :       if (!rawStream)
     110               0 :         return NS_ERROR_FAILURE;
     111                 : 
     112               0 :       rv = rawStream->SetData((const char*)data, length);
     113               0 :       NS_ENSURE_SUCCESS(rv, rv);
     114                 : 
     115               0 :       rv = converter->OnDataAvailable(request, nsnull, rawStream, 0, length);
     116               0 :       NS_ENSURE_SUCCESS(rv, rv);
     117                 : 
     118               0 :       converter->OnStopRequest(request, nsnull, NS_OK);
     119                 :     }
     120                 :   }
     121               1 :   return rv;
     122                 : }
     123                 : 
     124                 : template<int N>
     125                 : static bool
     126                 : StringBeginsWithLowercaseLiteral(nsAString& aString,
     127                 :                                  const char (&aSubstring)[N])
     128                 : {
     129                 :   return StringHead(aString, N).LowerCaseEqualsLiteral(aSubstring);
     130                 : }
     131                 : 
     132                 : bool
     133               0 : HasAttachmentDisposition(nsIHttpChannel* httpChannel)
     134                 : {
     135               0 :   if (!httpChannel)
     136               0 :     return false;
     137                 : 
     138                 :   PRUint32 disp;
     139               0 :   nsresult rv = httpChannel->GetContentDisposition(&disp);
     140                 : 
     141               0 :   if (NS_SUCCEEDED(rv) && disp == nsIChannel::DISPOSITION_ATTACHMENT)
     142               0 :     return true;
     143                 : 
     144               0 :   return false;
     145                 : }
     146                 : 
     147                 : /**
     148                 :  * @return the first occurrence of a character within a string buffer,
     149                 :  *         or nsnull if not found
     150                 :  */
     151                 : static const char*
     152               0 : FindChar(char c, const char *begin, const char *end)
     153                 : {
     154               0 :   for (; begin < end; ++begin) {
     155               0 :     if (*begin == c)
     156               0 :       return begin;
     157                 :   }
     158               0 :   return nsnull;
     159                 : }
     160                 : 
     161                 : /**
     162                 :  *
     163                 :  * Determine if a substring is the "documentElement" in the document.
     164                 :  *
     165                 :  * All of our sniffed substrings: <rss, <feed, <rdf:RDF must be the "document"
     166                 :  * element within the XML DOM, i.e. the root container element. Otherwise,
     167                 :  * it's possible that someone embedded one of these tags inside a document of
     168                 :  * another type, e.g. a HTML document, and we don't want to show the preview
     169                 :  * page if the document isn't actually a feed.
     170                 :  * 
     171                 :  * @param   start
     172                 :  *          The beginning of the data being sniffed
     173                 :  * @param   end
     174                 :  *          The end of the data being sniffed, right before the substring that
     175                 :  *          was found.
     176                 :  * @returns true if the found substring is the documentElement, false 
     177                 :  *          otherwise.
     178                 :  */
     179                 : static bool
     180               0 : IsDocumentElement(const char *start, const char* end)
     181                 : {
     182                 :   // For every tag in the buffer, check to see if it's a PI, Doctype or 
     183                 :   // comment, our desired substring or something invalid.
     184               0 :   while ( (start = FindChar('<', start, end)) ) {
     185               0 :     ++start;
     186               0 :     if (start >= end)
     187               0 :       return false;
     188                 : 
     189                 :     // Check to see if the character following the '<' is either '?' or '!'
     190                 :     // (processing instruction or doctype or comment)... these are valid nodes
     191                 :     // to have in the prologue. 
     192               0 :     if (*start != '?' && *start != '!')
     193               0 :       return false;
     194                 :     
     195                 :     // Now advance the iterator until the '>' (We do this because we don't want
     196                 :     // to sniff indicator substrings that are embedded within other nodes, e.g.
     197                 :     // comments: <!-- <rdf:RDF .. > -->
     198               0 :     start = FindChar('>', start, end);
     199               0 :     if (!start)
     200               0 :       return false;
     201                 : 
     202               0 :     ++start;
     203                 :   }
     204               0 :   return true;
     205                 : }
     206                 : 
     207                 : /**
     208                 :  * Determines whether or not a string exists as the root element in an XML data
     209                 :  * string buffer.
     210                 :  * @param   dataString
     211                 :  *          The data being sniffed
     212                 :  * @param   substring
     213                 :  *          The substring being tested for existence and root-ness.
     214                 :  * @returns true if the substring exists and is the documentElement, false
     215                 :  *          otherwise.
     216                 :  */
     217                 : static bool
     218               3 : ContainsTopLevelSubstring(nsACString& dataString, const char *substring) 
     219                 : {
     220               3 :   PRInt32 offset = dataString.Find(substring);
     221               3 :   if (offset == -1)
     222               3 :     return false;
     223                 : 
     224               0 :   const char *begin = dataString.BeginReading();
     225                 : 
     226                 :   // Only do the validation when we find the substring.
     227               0 :   return IsDocumentElement(begin, begin + offset);
     228                 : }
     229                 : 
     230                 : NS_IMETHODIMP
     231               4 : nsFeedSniffer::GetMIMETypeFromContent(nsIRequest* request, 
     232                 :                                       const PRUint8* data, 
     233                 :                                       PRUint32 length, 
     234                 :                                       nsACString& sniffedType)
     235                 : {
     236               8 :   nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(request));
     237               4 :   if (!channel)
     238               3 :     return NS_ERROR_NO_INTERFACE;
     239                 : 
     240                 :   // Check that this is a GET request, since you can't subscribe to a POST...
     241               2 :   nsCAutoString method;
     242               1 :   channel->GetRequestMethod(method);
     243               1 :   if (!method.Equals("GET")) {
     244               0 :     sniffedType.Truncate();
     245               0 :     return NS_OK;
     246                 :   }
     247                 : 
     248                 :   // We need to find out if this is a load of a view-source document. In this
     249                 :   // case we do not want to override the content type, since the source display
     250                 :   // does not need to be converted from feed format to XUL. More importantly, 
     251                 :   // we don't want to change the content type from something 
     252                 :   // nsContentDLF::CreateInstance knows about (e.g. application/xml, text/html 
     253                 :   // etc) to something that only the application fe knows about (maybe.feed) 
     254                 :   // thus deactivating syntax highlighting.
     255               2 :   nsCOMPtr<nsIURI> originalURI;
     256               1 :   channel->GetOriginalURI(getter_AddRefs(originalURI));
     257                 : 
     258               2 :   nsCAutoString scheme;
     259               1 :   originalURI->GetScheme(scheme);
     260               1 :   if (scheme.EqualsLiteral("view-source")) {
     261               0 :     sniffedType.Truncate();
     262               0 :     return NS_OK;
     263                 :   }
     264                 : 
     265                 :   // Check the Content-Type to see if it is set correctly. If it is set to 
     266                 :   // something specific that we think is a reliable indication of a feed, don't
     267                 :   // bother sniffing since we assume the site maintainer knows what they're 
     268                 :   // doing. 
     269               2 :   nsCAutoString contentType;
     270               1 :   channel->GetContentType(contentType);
     271               1 :   bool noSniff = contentType.EqualsLiteral(TYPE_RSS) ||
     272               1 :                    contentType.EqualsLiteral(TYPE_ATOM);
     273                 : 
     274                 :   // Check to see if this was a feed request from the location bar or from
     275                 :   // the feed: protocol. This is also a reliable indication.
     276                 :   // The value of the header doesn't matter.  
     277               1 :   if (!noSniff) {
     278               2 :     nsCAutoString sniffHeader;
     279                 :     nsresult foundHeader =
     280               2 :       channel->GetRequestHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
     281               1 :                                 sniffHeader);
     282               1 :     noSniff = NS_SUCCEEDED(foundHeader);
     283                 :   }
     284                 : 
     285               1 :   if (noSniff) {
     286                 :     // check for an attachment after we have a likely feed.
     287               0 :     if(HasAttachmentDisposition(channel)) {
     288               0 :       sniffedType.Truncate();
     289               0 :       return NS_OK;
     290                 :     }
     291                 : 
     292                 :     // set the feed header as a response header, since we have good metadata
     293                 :     // telling us that the feed is supposed to be RSS or Atom
     294               0 :     channel->SetResponseHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
     295               0 :                                NS_LITERAL_CSTRING("1"), false);
     296               0 :     sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
     297               0 :     return NS_OK;
     298                 :   }
     299                 : 
     300                 :   // Don't sniff arbitrary types.  Limit sniffing to situations that
     301                 :   // we think can reasonably arise.
     302               1 :   if (!contentType.EqualsLiteral(TEXT_HTML) &&
     303               0 :       !contentType.EqualsLiteral(APPLICATION_OCTET_STREAM) &&
     304                 :       // Same criterion as XMLHttpRequest.  Should we be checking for "+xml"
     305                 :       // and check for text/xml and application/xml by hand instead?
     306               0 :       contentType.Find("xml") == -1) {
     307               0 :     sniffedType.Truncate();
     308               0 :     return NS_OK;
     309                 :   }
     310                 : 
     311                 :   // Now we need to potentially decompress data served with 
     312                 :   // Content-Encoding: gzip
     313               1 :   nsresult rv = ConvertEncodedData(request, data, length);
     314               1 :   if (NS_FAILED(rv))
     315               0 :     return rv;
     316                 :   
     317                 :   const char* testData = 
     318               1 :     mDecodedData.IsEmpty() ? (const char*)data : mDecodedData.get();
     319                 : 
     320                 :   // The strategy here is based on that described in:
     321                 :   // http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx
     322                 :   // for interoperarbility purposes.
     323                 : 
     324                 :   // We cap the number of bytes to scan at MAX_BYTES to prevent picking up 
     325                 :   // false positives by accidentally reading document content, e.g. a "how to
     326                 :   // make a feed" page.
     327               1 :   if (length > MAX_BYTES)
     328               0 :     length = MAX_BYTES;
     329                 : 
     330                 :   // Thus begins the actual sniffing.
     331               2 :   nsDependentCSubstring dataString((const char*)testData, length);
     332                 : 
     333               1 :   bool isFeed = false;
     334                 : 
     335                 :   // RSS 0.91/0.92/2.0
     336               1 :   isFeed = ContainsTopLevelSubstring(dataString, "<rss");
     337                 : 
     338                 :   // Atom 1.0
     339               1 :   if (!isFeed)
     340               1 :     isFeed = ContainsTopLevelSubstring(dataString, "<feed");
     341                 : 
     342                 :   // RSS 1.0
     343               1 :   if (!isFeed) {
     344               1 :     isFeed = ContainsTopLevelSubstring(dataString, "<rdf:RDF") &&
     345               0 :       dataString.Find(NS_RDF) != -1 &&
     346               1 :       dataString.Find(NS_RSS) != -1;
     347                 :   }
     348                 : 
     349                 :   // If we sniffed a feed, coerce our internal type
     350               1 :   if (isFeed && !HasAttachmentDisposition(channel))
     351               0 :     sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
     352                 :   else
     353               1 :     sniffedType.Truncate();
     354               1 :   return NS_OK;
     355                 : }
     356                 : 
     357                 : NS_IMETHODIMP
     358               0 : nsFeedSniffer::OnStartRequest(nsIRequest* request, nsISupports* context)
     359                 : {
     360               0 :   return NS_OK;
     361                 : }
     362                 : 
     363                 : NS_METHOD
     364               0 : nsFeedSniffer::AppendSegmentToString(nsIInputStream* inputStream,
     365                 :                                      void* closure,
     366                 :                                      const char* rawSegment,
     367                 :                                      PRUint32 toOffset,
     368                 :                                      PRUint32 count,
     369                 :                                      PRUint32* writeCount)
     370                 : {
     371               0 :   nsCString* decodedData = static_cast<nsCString*>(closure);
     372               0 :   decodedData->Append(rawSegment, count);
     373               0 :   *writeCount = count;
     374               0 :   return NS_OK;
     375                 : }
     376                 : 
     377                 : NS_IMETHODIMP
     378               0 : nsFeedSniffer::OnDataAvailable(nsIRequest* request, nsISupports* context,
     379                 :                                nsIInputStream* stream, PRUint32 offset, 
     380                 :                                PRUint32 count)
     381                 : {
     382                 :   PRUint32 read;
     383                 :   return stream->ReadSegments(AppendSegmentToString, &mDecodedData, count, 
     384               0 :                               &read);
     385                 : }
     386                 : 
     387                 : NS_IMETHODIMP
     388               0 : nsFeedSniffer::OnStopRequest(nsIRequest* request, nsISupports* context, 
     389                 :                              nsresult status)
     390                 : {
     391               0 :   return NS_OK; 
     392                 : }

Generated by: LCOV version 1.7