LCOV - code coverage report
Current view: directory - content/base/src - mozSanitizingSerializer.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 229 0 0.0 %
Date: 2012-06-02 Functions: 34 0 0.0 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* vim: set ts=2 sw=2 et tw=80: */
       3                 : /* ***** BEGIN LICENSE BLOCK *****
       4                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       5                 :  *
       6                 :  * The contents of this file are subject to the Mozilla Public License Version
       7                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       8                 :  * the License. You may obtain a copy of the License at
       9                 :  * http://www.mozilla.org/MPL/
      10                 :  *
      11                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      12                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      13                 :  * for the specific language governing rights and limitations under the
      14                 :  * License.
      15                 :  *
      16                 :  * The Original Code is mozilla.org HTML Sanitizer code.
      17                 :  *
      18                 :  * The Initial Developer of the Original Code is
      19                 :  * Ben Bucksch <mozilla@bucksch.org>.
      20                 :  * Portions created by the Initial Developer are Copyright (C) 2002
      21                 :  * the Initial Developer. All Rights Reserved.
      22                 :  *
      23                 :  * Contributor(s):
      24                 :  *   Netscape
      25                 :  *
      26                 :  * Alternatively, the contents of this file may be used under the terms of
      27                 :  * either of the GNU General Public License Version 2 or later (the "GPL"),
      28                 :  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      29                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      30                 :  * of those above. If you wish to allow use of your version of this file only
      31                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      32                 :  * use your version of this file under the terms of the MPL, indicate your
      33                 :  * decision by deleting the provisions above and replace them with the notice
      34                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      35                 :  * the provisions above, a recipient may use your version of this file under
      36                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      37                 :  *
      38                 :  * ***** END LICENSE BLOCK ***** */
      39                 : 
      40                 : /*
      41                 :  * A serializer and content sink that removes potentially insecure or
      42                 :  * otherwise dangerous or offending HTML (eg for display of HTML
      43                 :  * e-mail attachments or something).
      44                 :  */
      45                 : 
      46                 : /* I used nsPlaintextSerializer as base for this class. I don't understand
      47                 :    all of the functions in the beginning. Possible that I fail to do
      48                 :    something or do something useless.
      49                 :    I am not proud about the implementation here at all.
      50                 :    Feel free to fix it :-).
      51                 : */
      52                 : 
      53                 : #include "mozSanitizingSerializer.h"
      54                 : #include "nsIServiceManager.h"
      55                 : #include "nsIDOMElement.h"
      56                 : #include "nsTextFragment.h"
      57                 : #include "nsContentUtils.h"
      58                 : #include "nsReadableUtils.h"
      59                 : #include "plstr.h"
      60                 : #include "nsIProperties.h"
      61                 : #include "nsUnicharUtils.h"
      62                 : #include "nsIURI.h"
      63                 : #include "nsNetUtil.h"
      64                 : #include "nsEscape.h"
      65                 : #include "mozilla/dom/Element.h"
      66                 : 
      67                 : using namespace mozilla::dom;
      68                 : 
      69               0 : static inline PRUnichar* escape(const nsString& source)
      70                 : {
      71               0 :   return nsEscapeHTML2(source.get(), source.Length()); 
      72                 : }
      73                 : 
      74                 : /* XXX: |printf|s in some error conditions. They are intended as information
      75                 :    for the user, because they complain about malformed pref values.
      76                 :    Not sure, if popping up dialog boxes is the right thing for such code
      77                 :    (and if so, how to do it).
      78                 :  */
      79                 : 
      80                 : #define TEXT_REMOVED "&lt;Text removed&gt;"
      81                 : #define TEXT_BREAKER "|"
      82                 : 
      83               0 : nsresult NS_NewSanitizingHTMLSerializer(nsIContentSerializer** aSerializer)
      84                 : {
      85               0 :   mozSanitizingHTMLSerializer* it = new mozSanitizingHTMLSerializer();
      86               0 :   NS_ADDREF(it);
      87               0 :   *aSerializer = it;
      88               0 :   return NS_OK;
      89                 : }
      90                 : 
      91               0 : mozSanitizingHTMLSerializer::mozSanitizingHTMLSerializer()
      92                 :   : mSkipLevel(0),
      93               0 :     mAllowedTags(30) // Just some initial buffer size
      94                 : {
      95               0 :   mOutputString = nsnull;
      96               0 : }
      97                 : 
      98               0 : mozSanitizingHTMLSerializer::~mozSanitizingHTMLSerializer()
      99                 : {
     100                 : #ifdef DEBUG_BenB
     101                 :   printf("Output:\n%s\n", NS_LossyConvertUTF16toASCII(*mOutputString).get());
     102                 : #endif
     103               0 :   mAllowedTags.Enumerate(ReleaseProperties);
     104               0 : }
     105                 : 
     106                 : //<copy from="xpcom/ds/nsProperties.cpp">
     107                 : bool
     108               0 : mozSanitizingHTMLSerializer::ReleaseProperties(nsHashKey* key, void* data,
     109                 :                                                void* closure)
     110                 : {
     111               0 :   nsIProperties* prop = (nsIProperties*)data;
     112               0 :   NS_IF_RELEASE(prop);
     113               0 :   return true;
     114                 : }
     115                 : //</copy>
     116                 : 
     117               0 : NS_IMPL_ISUPPORTS4(mozSanitizingHTMLSerializer,
     118                 :                    nsIContentSerializer,
     119                 :                    nsIContentSink,
     120                 :                    nsIHTMLContentSink,
     121                 :                    mozISanitizingHTMLSerializer)
     122                 : 
     123                 : 
     124                 : NS_IMETHODIMP 
     125               0 : mozSanitizingHTMLSerializer::Init(PRUint32 aFlags, PRUint32 dummy,
     126                 :                                   const char* aCharSet, bool aIsCopying,
     127                 :                                   bool aIsWholeDocument)
     128                 : {
     129               0 :   NS_ENSURE_TRUE(nsContentUtils::GetParserService(), NS_ERROR_UNEXPECTED);
     130                 : 
     131               0 :   return NS_OK;
     132                 : }
     133                 : 
     134                 : NS_IMETHODIMP
     135               0 : mozSanitizingHTMLSerializer::Initialize(nsAString* aOutString,
     136                 :                                         PRUint32 aFlags,
     137                 :                                         const nsAString& allowedTags)
     138                 : {
     139               0 :   nsresult rv = Init(aFlags, 0, nsnull, false, false);
     140               0 :   NS_ENSURE_SUCCESS(rv, rv);
     141                 : 
     142                 :   // XXX This is wrong. It violates XPCOM string ownership rules.
     143                 :   // We're only getting away with this because instances of this
     144                 :   // class are restricted to single function scope.
     145                 :   // (Comment copied from nsPlaintextSerializer)
     146               0 :   mOutputString = aOutString;
     147                 : 
     148               0 :   ParsePrefs(allowedTags);
     149                 : 
     150               0 :   return NS_OK;
     151                 : }
     152                 : 
     153                 : // This is not used within the class, but maybe called from somewhere else?
     154                 : NS_IMETHODIMP
     155               0 : mozSanitizingHTMLSerializer::Flush(nsAString& aStr)
     156                 : {
     157                 : #ifdef DEBUG_BenB
     158                 :   printf("Flush: -%s-", NS_LossyConvertUTF16toASCII(aStr).get());
     159                 : #endif
     160               0 :   Write(aStr);
     161               0 :   return NS_OK;
     162                 : }
     163                 : 
     164                 : NS_IMETHODIMP
     165               0 : mozSanitizingHTMLSerializer::AppendDocumentStart(nsIDocument *aDocument,
     166                 :                                                  nsAString& aStr)
     167                 : {
     168               0 :   return NS_OK;
     169                 : }
     170                 : 
     171                 : void
     172               0 : mozSanitizingHTMLSerializer::Write(const nsAString& aString)
     173                 : {
     174               0 :   mOutputString->Append(aString);
     175               0 : }
     176                 : 
     177                 : 
     178                 : NS_IMETHODIMP
     179               0 : mozSanitizingHTMLSerializer::IsEnabled(PRInt32 aTag, bool* aReturn)
     180                 : {
     181               0 :   *aReturn = false;
     182               0 :   return NS_OK;
     183                 : }
     184                 : 
     185                 : 
     186                 : /**
     187                 :  * Returns true, if the id represents a container
     188                 :  */
     189                 : bool
     190               0 : mozSanitizingHTMLSerializer::IsContainer(PRInt32 aId)
     191                 : {
     192               0 :   bool isContainer = false;
     193                 : 
     194               0 :   nsIParserService* parserService = nsContentUtils::GetParserService();
     195               0 :   if (parserService) {
     196               0 :     parserService->IsContainer(aId, isContainer);
     197                 :   }
     198                 : 
     199               0 :   return isContainer;
     200                 : }
     201                 : 
     202                 : 
     203                 : /* XXX I don't really know, what these functions do, but they seem to be
     204                 :    needed ;-). Mostly copied from nsPlaintextSerializer. */
     205                 : /* akk says:
     206                 :    "I wonder if the sanitizing class could inherit from nsHTMLSerializer,
     207                 :    so that at least these methods that none of us understand only have to be
     208                 :    written once?" */
     209                 : 
     210                 : // static
     211                 : PRInt32
     212               0 : mozSanitizingHTMLSerializer::GetIdForContent(nsIContent* aContent)
     213                 : {
     214               0 :   if (!aContent->IsHTML()) {
     215               0 :     return eHTMLTag_unknown;
     216                 :   }
     217                 : 
     218               0 :   nsIParserService* parserService = nsContentUtils::GetParserService();
     219                 : 
     220               0 :   return parserService ? parserService->HTMLAtomTagToId(aContent->Tag()) :
     221               0 :                          eHTMLTag_unknown;
     222                 : }
     223                 : 
     224                 : NS_IMETHODIMP 
     225               0 : mozSanitizingHTMLSerializer::AppendText(nsIContent* aText,
     226                 :                                         PRInt32 aStartOffset,
     227                 :                                         PRInt32 aEndOffset, 
     228                 :                                         nsAString& aStr)
     229                 : {
     230               0 :   nsresult rv = NS_OK;
     231                 : 
     232               0 :   mOutputString = &aStr;
     233                 : 
     234               0 :   nsAutoString linebuffer;
     235               0 :   rv = DoAddLeaf(eHTMLTag_text, linebuffer);
     236                 : 
     237               0 :   return rv;
     238                 : }
     239                 : 
     240                 : NS_IMETHODIMP 
     241               0 : mozSanitizingHTMLSerializer::AppendElementStart(Element* aElement,
     242                 :                                                 Element* aOriginalElement,
     243                 :                                                 nsAString& aStr)
     244                 : {
     245               0 :   NS_ENSURE_ARG(aElement);
     246                 : 
     247               0 :   mElement = aElement;
     248                 : 
     249               0 :   mOutputString = &aStr;
     250                 : 
     251               0 :   PRInt32 id = GetIdForContent(mElement);
     252                 : 
     253               0 :   bool isContainer = IsContainer(id);
     254                 : 
     255                 :   nsresult rv;
     256               0 :   if (isContainer) {
     257               0 :     rv = DoOpenContainer(id);
     258                 :   }
     259                 :   else {
     260               0 :     rv = DoAddLeaf(id, EmptyString());
     261                 :   }
     262                 : 
     263               0 :   mElement = nsnull;
     264               0 :   mOutputString = nsnull;
     265                 : 
     266               0 :   return rv;
     267                 : } 
     268                 :  
     269                 : NS_IMETHODIMP 
     270               0 : mozSanitizingHTMLSerializer::AppendElementEnd(Element* aElement,
     271                 :                                               nsAString& aStr)
     272                 : {
     273               0 :   NS_ENSURE_ARG(aElement);
     274                 : 
     275               0 :   mElement = aElement;
     276                 : 
     277               0 :   mOutputString = &aStr;
     278                 : 
     279               0 :   PRInt32 id = GetIdForContent(mElement);
     280                 : 
     281               0 :   bool isContainer = IsContainer(id);
     282                 : 
     283               0 :   nsresult rv = NS_OK;
     284               0 :   if (isContainer) {
     285               0 :     rv = DoCloseContainer(id);
     286                 :   }
     287                 : 
     288               0 :   mElement = nsnull;
     289               0 :   mOutputString = nsnull;
     290                 : 
     291               0 :   return rv;
     292                 : }
     293                 : 
     294                 : NS_IMETHODIMP
     295               0 : mozSanitizingHTMLSerializer::OpenContainer(const nsIParserNode& aNode)
     296                 : {
     297               0 :   PRInt32 type = aNode.GetNodeType();
     298                 : 
     299               0 :   mParserNode = const_cast<nsIParserNode *>(&aNode);
     300               0 :   return DoOpenContainer(type);
     301                 : }
     302                 : 
     303                 : NS_IMETHODIMP 
     304               0 : mozSanitizingHTMLSerializer::CloseContainer(const nsHTMLTag aTag)
     305                 : {
     306               0 :   return DoCloseContainer(aTag);
     307                 : }
     308                 : 
     309                 : NS_IMETHODIMP 
     310               0 : mozSanitizingHTMLSerializer::AddLeaf(const nsIParserNode& aNode)
     311                 : {
     312               0 :   eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
     313               0 :   const nsAString& text = aNode.GetText();
     314                 : 
     315               0 :   mParserNode = const_cast<nsIParserNode*>(&aNode);
     316               0 :   return DoAddLeaf(type, text);
     317                 : }
     318                 : 
     319                 : NS_IMETHODIMP 
     320               0 : mozSanitizingHTMLSerializer::SetDocumentCharset(nsACString& aCharset)
     321                 : {
     322                 :   // No idea, if this works - it isn't invoked by |TestOutput|.
     323               0 :   Write(NS_LITERAL_STRING("\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=")
     324                 :         /* Danger: breaking the line within the string literal, like
     325                 :            "foo"\n"bar", breaks win32! */
     326               0 :         + nsAdoptingString(escape(NS_ConvertASCIItoUTF16(aCharset)))
     327               0 :         + NS_LITERAL_STRING("\">\n"));
     328               0 :   return NS_OK;
     329                 : }
     330                 : 
     331                 : NS_IMETHODIMP 
     332               0 : mozSanitizingHTMLSerializer::OpenHead()
     333                 : {
     334                 :   // XXX We don't have a parser node here, is it okay to ignore this?
     335                 :   // return OpenContainer(aNode);
     336               0 :   return NS_OK;
     337                 : }
     338                 : 
     339                 : // Here comes the actual code...
     340                 : 
     341                 : nsresult
     342               0 : mozSanitizingHTMLSerializer::DoOpenContainer(PRInt32 aTag)
     343                 : {
     344               0 :   eHTMLTags type = (eHTMLTags)aTag;
     345                 : 
     346               0 :   if (mSkipLevel == 0 && IsAllowedTag(type))
     347                 :   {
     348               0 :     nsIParserService* parserService = nsContentUtils::GetParserService();
     349               0 :     if (!parserService)
     350               0 :       return NS_ERROR_OUT_OF_MEMORY;
     351               0 :     const PRUnichar* tag_name = parserService->HTMLIdToStringTag(aTag);
     352               0 :     NS_ENSURE_TRUE(tag_name, NS_ERROR_INVALID_POINTER);
     353                 : 
     354               0 :     Write(NS_LITERAL_STRING("<") + nsDependentString(tag_name));
     355                 : 
     356                 :     // Attributes
     357               0 :     if (mParserNode)
     358                 :     {
     359               0 :       PRInt32 count = mParserNode->GetAttributeCount();
     360               0 :       for (PRInt32 i = 0; i < count; i++)
     361                 :       {
     362               0 :         const nsAString& key = mParserNode->GetKeyAt(i);
     363               0 :         if(IsAllowedAttribute(type, key))
     364                 :         {
     365                 :           // Ensure basic sanity of value
     366               0 :           nsAutoString value(mParserNode->GetValueAt(i));
     367                 :                     // SanitizeAttrValue() modifies |value|
     368               0 :           if (NS_SUCCEEDED(SanitizeAttrValue(type, key, value)))
     369                 :           {
     370                 :             // Write out
     371               0 :             Write(NS_LITERAL_STRING(" "));
     372               0 :             Write(key); // I get an infinive loop with | + key + | !!!
     373               0 :             Write(NS_LITERAL_STRING("=\"") + value + NS_LITERAL_STRING("\""));
     374                 :           }
     375                 :         }
     376                 :       }
     377                 :     }
     378                 : 
     379               0 :     Write(NS_LITERAL_STRING(">"));
     380                 :   }
     381               0 :   else if (mSkipLevel != 0 || type == eHTMLTag_script || type == eHTMLTag_style)
     382               0 :     ++mSkipLevel;
     383                 :   else
     384               0 :     Write(NS_LITERAL_STRING(" "));
     385                 : 
     386               0 :   return NS_OK;
     387                 : 
     388                 : }
     389                 : 
     390                 : nsresult
     391               0 : mozSanitizingHTMLSerializer::DoCloseContainer(PRInt32 aTag)
     392                 : {
     393               0 :   eHTMLTags type = (eHTMLTags)aTag;
     394                 : 
     395               0 :   if (mSkipLevel == 0 && IsAllowedTag(type)) {
     396               0 :     nsIParserService* parserService = nsContentUtils::GetParserService();
     397               0 :     if (!parserService)
     398               0 :       return NS_ERROR_OUT_OF_MEMORY;
     399               0 :     const PRUnichar* tag_name = parserService->HTMLIdToStringTag(aTag);
     400               0 :     NS_ENSURE_TRUE(tag_name, NS_ERROR_INVALID_POINTER);
     401                 : 
     402               0 :     Write(NS_LITERAL_STRING("</") + nsDependentString(tag_name)
     403               0 :           + NS_LITERAL_STRING(">"));
     404                 :   }
     405               0 :   else if (mSkipLevel == 0)
     406               0 :     Write(NS_LITERAL_STRING(" "));
     407                 :   else
     408               0 :     --mSkipLevel;
     409                 : 
     410               0 :   return NS_OK;
     411                 : }
     412                 : 
     413                 : nsresult
     414               0 : mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
     415                 :                                        const nsAString& aText)
     416                 : {
     417               0 :   if (mSkipLevel != 0)
     418               0 :     return NS_OK;
     419                 : 
     420               0 :   eHTMLTags type = (eHTMLTags)aTag;
     421                 : 
     422               0 :   nsresult rv = NS_OK;
     423                 : 
     424               0 :   if (type == eHTMLTag_whitespace ||
     425                 :       type == eHTMLTag_newline)
     426                 :   {
     427               0 :     Write(aText); // sure to be safe?
     428                 :   }
     429               0 :   else if (type == eHTMLTag_text)
     430                 :   {
     431               0 :     nsAutoString text(aText);
     432               0 :     if(NS_SUCCEEDED(SanitizeTextNode(text)))
     433               0 :       Write(text);
     434                 :     else
     435               0 :       Write(NS_LITERAL_STRING(TEXT_REMOVED)); // Does not happen (yet)
     436               0 :     NS_ENSURE_SUCCESS(rv, rv);
     437                 :   }
     438               0 :   else if (type == eHTMLTag_entity)
     439                 :   {
     440               0 :     Write(NS_LITERAL_STRING("&"));
     441               0 :     Write(aText); // sure to be safe?
     442                 :     // using + operator here might give an infinitive loop, see above.
     443                 :     // not adding ";", because Gecko delivers that as part of |aText| (freaky)
     444                 :   }
     445                 :   else
     446                 :   {
     447               0 :     DoOpenContainer(type);
     448                 :   }
     449                 : 
     450               0 :   return rv;
     451                 : }
     452                 : 
     453                 : 
     454                 : /**
     455                 :    Similar to SanitizeAttrValue.
     456                 :  */
     457                 : nsresult
     458               0 : mozSanitizingHTMLSerializer::SanitizeTextNode(nsString& aText /*inout*/)
     459                 : {
     460               0 :   aText.Adopt(escape(aText));
     461               0 :   return NS_OK;
     462                 : }
     463                 : 
     464                 : /**
     465                 :    Ensures basic sanity of attribute value.
     466                 :    This function also (tries to :-( ) makes sure, that no
     467                 :    unwanted / dangerous URLs appear in the document
     468                 :    (like javascript: and data:).
     469                 : 
     470                 :    Pass the value as |aValue| arg. It will be modified in-place.
     471                 : 
     472                 :    If the value is not allowed at all, we return with NS_ERROR_ILLEGAL_VALUE.
     473                 :    In that case, do not use the |aValue|, but output nothing.
     474                 :  */
     475                 : nsresult
     476               0 : mozSanitizingHTMLSerializer::SanitizeAttrValue(nsHTMLTag aTag,
     477                 :                                                const nsAString& anAttrName,
     478                 :                                                nsString& aValue /*inout*/)
     479                 : {
     480                 :   /* First, cut the attribute to 1000 chars.
     481                 :      Attributes with values longer than 1000 chars seem bogus,
     482                 :      considering that we don't support any JS. The longest attributes
     483                 :      I can think of are URLs, and URLs with 1000 chars are likely to be
     484                 :      bogus, too. */
     485               0 :   aValue = Substring(aValue, 0, 1000);
     486                 :   //aValue.Truncate(1000); //-- this cuts half of the document !!?!!
     487                 : 
     488               0 :   aValue.Adopt(escape(aValue));
     489                 : 
     490                 :   /* Check some known bad stuff. Add more!
     491                 :      I don't care too much, if it happens to trigger in some innocent cases
     492                 :      (like <img alt="Statistical data: Mortage rates and newspapers">) -
     493                 :      security first. */
     494               0 :   if (aValue.Find("javascript:") != kNotFound ||
     495               0 :       aValue.Find("data:") != kNotFound ||
     496               0 :       aValue.Find("base64") != kNotFound)
     497               0 :     return NS_ERROR_ILLEGAL_VALUE;
     498                 : 
     499                 :   // Check img src scheme
     500               0 :   if (aTag == eHTMLTag_img && 
     501               0 :       anAttrName.LowerCaseEqualsLiteral("src"))
     502                 :   {
     503                 :     nsresult rv;
     504               0 :     nsCOMPtr<nsIIOService> ioService = do_GetIOService(&rv);
     505               0 :     NS_ENSURE_SUCCESS(rv, rv);
     506               0 :     nsCAutoString scheme;
     507               0 :     rv = ioService->ExtractScheme(NS_LossyConvertUTF16toASCII(aValue), scheme);
     508               0 :     NS_ENSURE_SUCCESS(rv, rv);
     509                 : 
     510               0 :     if (!scheme.Equals("cid", nsCaseInsensitiveCStringComparator()))
     511               0 :       return NS_ERROR_ILLEGAL_VALUE;
     512                 :   }
     513                 : 
     514                 : #ifdef DEBUG_BenB
     515                 :   printf("attribute value for %s: -%s-\n",
     516                 :          NS_LossyConvertUTF16toASCII(anAttrName).get(),
     517                 :          NS_LossyConvertUTF16toASCII(aValue).get());
     518                 : #endif
     519                 : 
     520               0 :   return NS_OK;
     521                 : }
     522                 : 
     523                 : /**
     524                 :  */
     525                 : bool
     526               0 : mozSanitizingHTMLSerializer::IsAllowedTag(nsHTMLTag aTag)
     527                 : {
     528                 : 
     529               0 :   nsPRUint32Key tag_key(aTag);
     530                 : #ifdef DEBUG_BenB
     531                 :   printf("IsAllowedTag %d: %s\n",
     532                 :          aTag,
     533                 :          mAllowedTags.Exists(&tag_key)?"yes":"no");
     534                 : #endif
     535               0 :   return mAllowedTags.Exists(&tag_key);
     536                 : }
     537                 : 
     538                 : 
     539                 : /**
     540                 :  */
     541                 : bool
     542               0 : mozSanitizingHTMLSerializer::IsAllowedAttribute(nsHTMLTag aTag,
     543                 :                                              const nsAString& anAttributeName)
     544                 : {
     545                 : #ifdef DEBUG_BenB
     546                 :   printf("IsAllowedAttribute %d, -%s-\n",
     547                 :          aTag,
     548                 :          NS_LossyConvertUTF16toASCII(anAttributeName).get());
     549                 : #endif
     550                 :   nsresult rv;
     551                 : 
     552               0 :   nsPRUint32Key tag_key(aTag);
     553               0 :   nsIProperties* attr_bag = (nsIProperties*)mAllowedTags.Get(&tag_key);
     554               0 :   NS_ENSURE_TRUE(attr_bag, false);
     555                 : 
     556                 :   bool allowed;
     557               0 :   nsCAutoString attr;
     558               0 :   ToLowerCase(NS_ConvertUTF16toUTF8(anAttributeName), attr);
     559               0 :   rv = attr_bag->Has(attr.get(), &allowed);
     560               0 :   if (NS_FAILED(rv))
     561               0 :     return false;
     562                 : 
     563                 : #ifdef DEBUG_BenB
     564                 :   printf(" Allowed: %s\n", allowed?"yes":"no");
     565                 : #endif
     566               0 :   return allowed;
     567                 : }
     568                 : 
     569                 : 
     570                 : /**
     571                 :    aPref is a long string, which holds an exhaustive list of allowed tags
     572                 :    and attributes. All other tags and attributes will be removed.
     573                 : 
     574                 :    aPref has the format
     575                 :    "html head body ul ol li a(href,name,title) img(src,alt,title) #text"
     576                 :    i.e.
     577                 :    - tags are separated by whitespace
     578                 :    - the attribute list follows the tag directly in brackets
     579                 :    - the attributes are separated by commas.
     580                 : 
     581                 :    There is no way to express further restrictions, like "no text inside the
     582                 :    <head> element". This is so to considerably reduce the complexity of the
     583                 :    pref and this implementation.
     584                 : 
     585                 :    Update: Akk told me that I might be able to use DTD classes. Later(TM)...
     586                 :  */
     587                 : nsresult
     588               0 : mozSanitizingHTMLSerializer::ParsePrefs(const nsAString& aPref)
     589                 : {
     590               0 :   char* pref = ToNewCString(aPref);
     591                 :   char* tags_lasts;
     592               0 :   for (char* iTag = PL_strtok_r(pref, " ", &tags_lasts);
     593                 :        iTag;
     594                 :        iTag = PL_strtok_r(NULL, " ", &tags_lasts))
     595                 :   {
     596               0 :     ParseTagPref(nsCAutoString(iTag));
     597                 :   }
     598               0 :   delete[] pref;
     599                 : 
     600               0 :   return NS_OK;
     601                 : }
     602                 : 
     603                 : 
     604                 : /**
     605                 :    Parses e.g. "a(href,title)" (but not several tags at once).
     606                 :  */
     607                 : nsresult
     608               0 : mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
     609                 : {
     610               0 :   nsIParserService* parserService = nsContentUtils::GetParserService();
     611               0 :   if (!parserService)
     612               0 :     return NS_ERROR_OUT_OF_MEMORY;
     613                 : 
     614                 :   // Parsing tag
     615               0 :   PRInt32 bracket = tagpref.FindChar('(');
     616               0 :   if (bracket == 0)
     617                 :   {
     618               0 :     printf(" malformed pref: %s\n", tagpref.get());
     619               0 :     return NS_ERROR_CANNOT_CONVERT_DATA;
     620                 :   }
     621                 : 
     622               0 :   nsAutoString tag;
     623               0 :   CopyUTF8toUTF16(StringHead(tagpref, bracket), tag);
     624                 : 
     625                 :   // Create key
     626               0 :   PRInt32 tag_id = parserService->HTMLStringTagToId(tag);
     627               0 :   if (tag_id == eHTMLTag_userdefined)
     628                 :   {
     629                 :     printf(" unknown tag <%s>, won't add.\n",
     630               0 :            NS_ConvertUTF16toUTF8(tag).get());
     631               0 :     return NS_ERROR_CANNOT_CONVERT_DATA;
     632                 :   }
     633               0 :   nsPRUint32Key tag_key(tag_id);
     634                 : 
     635               0 :   if (mAllowedTags.Exists(&tag_key))
     636                 :   {
     637               0 :     printf(" duplicate tag: %s\n", NS_ConvertUTF16toUTF8(tag).get());
     638               0 :     return NS_ERROR_CANNOT_CONVERT_DATA;
     639                 :   }
     640               0 :   if (bracket == kNotFound)
     641                 :     /* There are no attributes in the pref. So, allow none; only the tag
     642                 :        itself */
     643                 :   {
     644               0 :     mAllowedTags.Put(&tag_key, 0);
     645                 :   }
     646                 :   else
     647                 :   {
     648                 :     // Attributes
     649                 : 
     650                 :     // where is the macro for non-fatal errors in opt builds?
     651               0 :     if(tagpref[tagpref.Length() - 1] != ')' ||
     652               0 :        tagpref.Length() < PRUint32(bracket) + 3)
     653                 :     {
     654               0 :       printf(" malformed pref: %s\n", tagpref.get());
     655               0 :       return NS_ERROR_CANNOT_CONVERT_DATA;
     656                 :     }
     657                 :     nsCOMPtr<nsIProperties> attr_bag =
     658               0 :                                  do_CreateInstance(NS_PROPERTIES_CONTRACTID);
     659               0 :     NS_ENSURE_TRUE(attr_bag, NS_ERROR_INVALID_POINTER);
     660               0 :     nsCAutoString attrList;
     661                 :     attrList.Append(Substring(tagpref,
     662                 :                               bracket + 1,
     663               0 :                               tagpref.Length() - 2 - bracket));
     664                 :     char* attrs_lasts;
     665               0 :     for (char* iAttr = PL_strtok_r(attrList.BeginWriting(),
     666               0 :                                    ",", &attrs_lasts);
     667                 :          iAttr;
     668                 :          iAttr = PL_strtok_r(NULL, ",", &attrs_lasts))
     669                 :     {
     670               0 :       attr_bag->Set(iAttr, 0);
     671                 :     }
     672                 : 
     673               0 :     nsIProperties* attr_bag_raw = attr_bag;
     674               0 :     NS_ADDREF(attr_bag_raw);
     675               0 :     mAllowedTags.Put(&tag_key, attr_bag_raw);
     676                 :   }
     677                 : 
     678               0 :   return NS_OK;
     679                 : }
     680                 : 
     681                 : /*
     682                 :   might be useful:
     683                 :   htmlparser/public/nsHTMLTokens.h for tag categories
     684                 : */

Generated by: LCOV version 1.7