LCOV - code coverage report
Current view: directory - netwerk/streamconv/converters - mozTXTToHTMLConv.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 588 0 0.0 %
Date: 2012-06-02 Functions: 35 0 0.0 %

       1                 : /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2                 : /* ***** BEGIN LICENSE BLOCK *****
       3                 :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       4                 :  *
       5                 :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :  * the License. You may obtain a copy of the License at
       8                 :  * http://www.mozilla.org/MPL/
       9                 :  *
      10                 :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :  * for the specific language governing rights and limitations under the
      13                 :  * License.
      14                 :  *
      15                 :  * The Original Code is the Mozilla Text to HTML converter code.
      16                 :  *
      17                 :  * The Initial Developer of the Original Code is
      18                 :  * Ben Bucksch <http://www.bucksch.org>.
      19                 :  * Portions created by the Initial Developer are Copyright (C) 1999, 2000
      20                 :  * the Initial Developer. All Rights Reserved.
      21                 :  *
      22                 :  * Contributor(s):
      23                 :  *
      24                 :  * Alternatively, the contents of this file may be used under the terms of
      25                 :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      26                 :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27                 :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28                 :  * of those above. If you wish to allow use of your version of this file only
      29                 :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30                 :  * use your version of this file under the terms of the MPL, indicate your
      31                 :  * decision by deleting the provisions above and replace them with the notice
      32                 :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33                 :  * the provisions above, a recipient may use your version of this file under
      34                 :  * the terms of any one of the MPL, the GPL or the LGPL.
      35                 :  *
      36                 :  * ***** END LICENSE BLOCK ***** */
      37                 : 
      38                 : #include "mozTXTToHTMLConv.h"
      39                 : #include "nsIServiceManager.h"
      40                 : #include "nsNetCID.h"
      41                 : #include "nsNetUtil.h"
      42                 : #include "nsReadableUtils.h"
      43                 : #include "nsUnicharUtils.h"
      44                 : #include "nsCRT.h"
      45                 : #include "nsIExternalProtocolHandler.h"
      46                 : 
      47                 : static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
      48                 : 
      49                 : #ifdef DEBUG_BenB_Perf
      50                 : #include "prtime.h"
      51                 : #include "prinrval.h"
      52                 : #endif
      53                 : 
      54                 : const PRFloat64 growthRate = 1.2;
      55                 : 
      56                 : // Bug 183111, editor now replaces multiple spaces with leading
      57                 : // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
      58                 : // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
      59                 : // Also recognize the Japanese ideographic space 0x3000 as a space.
      60               0 : static inline bool IsSpace(const PRUnichar aChar)
      61                 : {
      62               0 :   return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
      63                 : }
      64                 : 
      65                 : // Escape Char will take ch, escape it and append the result to 
      66                 : // aStringToAppendTo
      67                 : void
      68               0 : mozTXTToHTMLConv::EscapeChar(const PRUnichar ch, nsString& aStringToAppendTo,
      69                 :                              bool inAttribute)
      70                 : {
      71               0 :     switch (ch)
      72                 :     {
      73                 :     case '<':
      74               0 :       aStringToAppendTo.AppendLiteral("&lt;");
      75               0 :       break;
      76                 :     case '>':
      77               0 :       aStringToAppendTo.AppendLiteral("&gt;");
      78               0 :       break;
      79                 :     case '&':
      80               0 :       aStringToAppendTo.AppendLiteral("&amp;");
      81               0 :       break;
      82                 :     case '"':
      83               0 :       if (inAttribute)
      84                 :       {
      85               0 :         aStringToAppendTo.AppendLiteral("&quot;");
      86               0 :         break;
      87                 :       }
      88                 :       // else fall through
      89                 :     default:
      90               0 :       aStringToAppendTo += ch;
      91                 :     }
      92                 : 
      93                 :     return;
      94                 : }
      95                 : 
      96                 : // EscapeStr takes the passed in string and
      97                 : // escapes it IN PLACE.
      98                 : void
      99               0 : mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
     100                 : {
     101                 :   // the replace substring routines
     102                 :   // don't seem to work if you have a character
     103                 :   // in the in string that is also in the replacement
     104                 :   // string! =(
     105                 :   //aInString.ReplaceSubstring("&", "&amp;");
     106                 :   //aInString.ReplaceSubstring("<", "&lt;");
     107                 :   //aInString.ReplaceSubstring(">", "&gt;");
     108               0 :   for (PRUint32 i = 0; i < aInString.Length();)
     109                 :   {
     110               0 :     switch (aInString[i])
     111                 :     {
     112                 :     case '<':
     113               0 :       aInString.Cut(i, 1);
     114               0 :       aInString.Insert(NS_LITERAL_STRING("&lt;"), i);
     115               0 :       i += 4; // skip past the integers we just added
     116               0 :       break;
     117                 :     case '>':
     118               0 :       aInString.Cut(i, 1);
     119               0 :       aInString.Insert(NS_LITERAL_STRING("&gt;"), i);
     120               0 :       i += 4; // skip past the integers we just added
     121               0 :       break;
     122                 :     case '&':
     123               0 :       aInString.Cut(i, 1);
     124               0 :       aInString.Insert(NS_LITERAL_STRING("&amp;"), i);
     125               0 :       i += 5; // skip past the integers we just added
     126               0 :       break;
     127                 :     case '"':
     128               0 :       if (inAttribute)
     129                 :       {
     130               0 :         aInString.Cut(i, 1);
     131               0 :         aInString.Insert(NS_LITERAL_STRING("&quot;"), i);
     132               0 :         i += 6;
     133               0 :         break;
     134                 :       }
     135                 :       // else fall through
     136                 :     default:
     137               0 :       i++;
     138                 :     }
     139                 :   }
     140               0 : }
     141                 : 
     142                 : void 
     143               0 : mozTXTToHTMLConv::UnescapeStr(const PRUnichar * aInString, PRInt32 aStartPos, PRInt32 aLength, nsString& aOutString)
     144                 : {
     145               0 :   const PRUnichar * subString = nsnull;
     146               0 :   for (PRUint32 i = aStartPos; PRInt32(i) - aStartPos < aLength;)
     147                 :   {
     148               0 :     PRInt32 remainingChars = i - aStartPos;
     149               0 :     if (aInString[i] == '&')
     150                 :     {
     151               0 :       subString = &aInString[i];
     152               0 :       if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&lt;").get(), MinInt(4, aLength - remainingChars)))
     153                 :       {
     154               0 :         aOutString.Append(PRUnichar('<'));
     155               0 :         i += 4;
     156                 :       }
     157               0 :       else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&gt;").get(), MinInt(4, aLength - remainingChars)))
     158                 :       {
     159               0 :         aOutString.Append(PRUnichar('>'));
     160               0 :         i += 4;
     161                 :       }
     162               0 :       else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&amp;").get(), MinInt(5, aLength - remainingChars)))
     163                 :       {
     164               0 :         aOutString.Append(PRUnichar('&'));
     165               0 :         i += 5;
     166                 :       }
     167               0 :       else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&quot;").get(), MinInt(6, aLength - remainingChars)))
     168                 :       {
     169               0 :         aOutString.Append(PRUnichar('"'));
     170               0 :         i += 6;
     171                 :       }
     172                 :       else
     173                 :       {
     174               0 :         aOutString += aInString[i];
     175               0 :         i++;
     176                 :       }
     177                 :     }
     178                 :     else
     179                 :     {
     180               0 :       aOutString += aInString[i];
     181               0 :       i++;
     182                 :     }
     183                 :   }
     184               0 : }
     185                 : 
     186                 : void
     187               0 : mozTXTToHTMLConv::CompleteAbbreviatedURL(const PRUnichar * aInString, PRInt32 aInLength, 
     188                 :                                          const PRUint32 pos, nsString& aOutString)
     189                 : {
     190               0 :   NS_ASSERTION(PRInt32(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
     191               0 :   if (PRInt32(pos) >= aInLength)
     192               0 :     return;
     193                 : 
     194               0 :   if (aInString[pos] == '@')
     195                 :   {
     196                 :     // only pre-pend a mailto url if the string contains a .domain in it..
     197                 :     //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
     198               0 :     nsDependentString inString(aInString, aInLength);
     199               0 :     if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
     200                 :     {
     201               0 :       aOutString.AssignLiteral("mailto:");
     202               0 :       aOutString += aInString;
     203                 :     }
     204                 :   }
     205               0 :   else if (aInString[pos] == '.')
     206                 :   {
     207               0 :     if (ItMatchesDelimited(aInString, aInLength,
     208               0 :                            NS_LITERAL_STRING("www.").get(), 4, LT_IGNORE, LT_IGNORE))
     209                 :     {
     210               0 :       aOutString.AssignLiteral("http://");
     211               0 :       aOutString += aInString;
     212                 :     }
     213               0 :     else if (ItMatchesDelimited(aInString,aInLength, NS_LITERAL_STRING("ftp.").get(), 4, LT_IGNORE, LT_IGNORE))
     214                 :     { 
     215               0 :       aOutString.AssignLiteral("ftp://");
     216               0 :       aOutString += aInString;
     217                 :     }
     218                 :   }
     219                 : }
     220                 : 
     221                 : bool
     222               0 : mozTXTToHTMLConv::FindURLStart(const PRUnichar * aInString, PRInt32 aInLength,
     223                 :                                const PRUint32 pos, const modetype check,
     224                 :                                PRUint32& start)
     225                 : {
     226               0 :   switch(check)
     227                 :   { // no breaks, because end of blocks is never reached
     228                 :   case RFC1738:
     229                 :   {
     230               0 :     if (!nsCRT::strncmp(&aInString[MaxInt(pos - 4, 0)], NS_LITERAL_STRING("<URL:").get(), 5))
     231                 :     {
     232               0 :       start = pos + 1;
     233               0 :       return true;
     234                 :     }
     235                 :     else
     236               0 :       return false;
     237                 :   }
     238                 :   case RFC2396E:
     239                 :   {
     240               0 :     nsString temp(aInString, aInLength);
     241               0 :     PRInt32 i = pos <= 0 ? kNotFound : temp.RFindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos - 1);
     242               0 :     if (i != kNotFound && (temp[PRUint32(i)] == '<' ||
     243               0 :                            temp[PRUint32(i)] == '"'))
     244                 :     {
     245               0 :       start = PRUint32(++i);
     246               0 :       return start < pos;
     247                 :     }
     248                 :     else
     249               0 :       return false;
     250                 :   }
     251                 :   case freetext:
     252                 :   {
     253               0 :     PRInt32 i = pos - 1;
     254               0 :     for (; i >= 0 && (
     255               0 :          nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
     256               0 :          nsCRT::IsAsciiDigit(aInString[PRUint32(i)]) ||
     257               0 :          aInString[PRUint32(i)] == '+' ||
     258               0 :          aInString[PRUint32(i)] == '-' ||
     259               0 :          aInString[PRUint32(i)] == '.'
     260                 :          ); i--)
     261                 :       ;
     262               0 :     if (++i >= 0 && PRUint32(i) < pos && nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]))
     263                 :     {
     264               0 :       start = PRUint32(i);
     265               0 :       return true;
     266                 :     }
     267                 :     else
     268               0 :       return false;
     269                 :   }
     270                 :   case abbreviated:
     271                 :   {
     272               0 :     PRInt32 i = pos - 1;
     273                 :     // This disallows non-ascii-characters for email.
     274                 :     // Currently correct, but revisit later after standards changed.
     275               0 :     bool isEmail = aInString[pos] == (PRUnichar)'@';
     276                 :     // These chars mark the start of the URL
     277               0 :     for (; i >= 0
     278               0 :              && aInString[PRUint32(i)] != '>' && aInString[PRUint32(i)] != '<'
     279               0 :              && aInString[PRUint32(i)] != '"' && aInString[PRUint32(i)] != '\''
     280               0 :              && aInString[PRUint32(i)] != '`' && aInString[PRUint32(i)] != ','
     281               0 :              && aInString[PRUint32(i)] != '{' && aInString[PRUint32(i)] != '['
     282               0 :              && aInString[PRUint32(i)] != '(' && aInString[PRUint32(i)] != '|'
     283               0 :              && aInString[PRUint32(i)] != '\\'
     284               0 :              && !IsSpace(aInString[PRUint32(i)])
     285               0 :              && (!isEmail || nsCRT::IsAscii(aInString[PRUint32(i)]))
     286                 :          ; i--)
     287                 :       ;
     288               0 :     if
     289                 :       (
     290                 :         ++i >= 0 && PRUint32(i) < pos
     291                 :           &&
     292                 :           (
     293               0 :             nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
     294               0 :             nsCRT::IsAsciiDigit(aInString[PRUint32(i)])
     295                 :           )
     296                 :       )
     297                 :     {
     298               0 :       start = PRUint32(i);
     299               0 :       return true;
     300                 :     }
     301                 :     else
     302               0 :       return false;
     303                 :   }
     304                 :   default:
     305               0 :     return false;
     306                 :   } //switch
     307                 : }
     308                 : 
     309                 : bool
     310               0 : mozTXTToHTMLConv::FindURLEnd(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos,
     311                 :            const modetype check, const PRUint32 start, PRUint32& end)
     312                 : {
     313               0 :   switch(check)
     314                 :   { // no breaks, because end of blocks is never reached
     315                 :   case RFC1738:
     316                 :   case RFC2396E:
     317                 :   {
     318               0 :     nsString temp(aInString, aInStringLength);
     319                 : 
     320               0 :     PRInt32 i = temp.FindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos + 1);
     321               0 :     if (i != kNotFound && temp[PRUint32(i--)] ==
     322               0 :         (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
     323                 :     {
     324               0 :       end = PRUint32(i);
     325               0 :       return end > pos;
     326                 :     }
     327                 :     else
     328               0 :       return false;
     329                 :   }
     330                 :   case freetext:
     331                 :   case abbreviated:
     332                 :   {
     333               0 :     PRUint32 i = pos + 1;
     334               0 :     bool isEmail = aInString[pos] == (PRUnichar)'@';
     335               0 :     bool haveOpeningBracket = false;
     336               0 :     for (; PRInt32(i) < aInStringLength; i++)
     337                 :     {
     338                 :       // These chars mark the end of the URL
     339               0 :       if (aInString[i] == '>' || aInString[i] == '<' ||
     340               0 :           aInString[i] == '"' || aInString[i] == '`' ||
     341               0 :           aInString[i] == '}' || aInString[i] == ']' ||
     342               0 :           aInString[i] == '{' || aInString[i] == '[' ||
     343               0 :           aInString[i] == '|' ||
     344               0 :           (aInString[i] == ')' && !haveOpeningBracket) ||
     345               0 :           IsSpace(aInString[i])    )
     346               0 :           break;
     347                 :       // Disallow non-ascii-characters for email.
     348                 :       // Currently correct, but revisit later after standards changed.
     349               0 :       if (isEmail && (
     350               0 :             aInString[i] == '(' || aInString[i] == '\'' ||
     351               0 :             !nsCRT::IsAscii(aInString[i])       ))
     352               0 :           break;
     353               0 :       if (aInString[i] == '(')
     354               0 :         haveOpeningBracket = true;
     355                 :     }
     356                 :     // These chars are allowed in the middle of the URL, but not at end.
     357                 :     // Technically they are, but are used in normal text after the URL.
     358               0 :     while (--i > pos && (
     359               0 :              aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
     360               0 :              aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
     361               0 :              aInString[i] == ':' || aInString[i] == '\''
     362                 :              ))
     363                 :         ;
     364               0 :     if (i > pos)
     365                 :     {
     366               0 :       end = i;
     367               0 :       return true;
     368                 :     }
     369                 :     else
     370               0 :       return false;
     371                 :   }
     372                 :   default:
     373               0 :     return false;
     374                 :   } //switch
     375                 : }
     376                 : 
     377                 : void
     378               0 : mozTXTToHTMLConv::CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength, 
     379                 :      const PRUint32 pos, const PRUint32 whathasbeendone,
     380                 :      const modetype check, const PRUint32 start, const PRUint32 end,
     381                 :      nsString& txtURL, nsString& desc,
     382                 :      PRInt32& replaceBefore, PRInt32& replaceAfter)
     383                 : {
     384               0 :   PRUint32 descstart = start;
     385               0 :   switch(check)
     386                 :   {
     387                 :   case RFC1738:
     388                 :   {
     389               0 :     descstart = start - 5;
     390               0 :     desc.Append(&aInString[descstart], end - descstart + 2);  // include "<URL:" and ">"
     391               0 :     replaceAfter = end - pos + 1;
     392               0 :   } break;
     393                 :   case RFC2396E:
     394                 :   {
     395               0 :     descstart = start - 1;
     396               0 :     desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
     397               0 :     replaceAfter = end - pos + 1;
     398               0 :   } break;
     399                 :   case freetext:
     400                 :   case abbreviated:
     401                 :   {
     402               0 :     descstart = start;
     403               0 :     desc.Append(&aInString[descstart], end - start + 1); // don't include brackets  
     404               0 :     replaceAfter = end - pos;
     405               0 :   } break;
     406               0 :   default: break;
     407                 :   } //switch
     408                 : 
     409               0 :   EscapeStr(desc, false);
     410                 : 
     411               0 :   txtURL.Append(&aInString[start], end - start + 1);
     412               0 :   txtURL.StripWhitespace();
     413                 : 
     414                 :   // FIX ME
     415               0 :   nsAutoString temp2;
     416               0 :   ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
     417               0 :   replaceBefore = temp2.Length();
     418                 :   return;
     419                 : }
     420                 : 
     421               0 : bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
     422                 : {
     423               0 :   if (!mIOService)
     424               0 :     return false;
     425                 : 
     426               0 :   nsCAutoString scheme;
     427               0 :   nsresult rv = mIOService->ExtractScheme(aURL, scheme);
     428               0 :   if(NS_FAILED(rv))
     429               0 :     return false;
     430                 : 
     431                 :   // Get the handler for this scheme.
     432               0 :   nsCOMPtr<nsIProtocolHandler> handler;    
     433               0 :   rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
     434               0 :   if(NS_FAILED(rv))
     435               0 :     return false;
     436                 : 
     437                 :   // Is it an external protocol handler? If not, linkify it.
     438               0 :   nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
     439               0 :   if (!externalHandler)
     440               0 :    return true; // handler is built-in, linkify it!
     441                 : 
     442                 :   // If external app exists for the scheme then linkify it.
     443                 :   bool exists;
     444               0 :   rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
     445               0 :   return(NS_SUCCEEDED(rv) && exists);
     446                 : }
     447                 : 
     448                 : bool
     449               0 : mozTXTToHTMLConv::CheckURLAndCreateHTML(
     450                 :      const nsString& txtURL, const nsString& desc, const modetype mode,
     451                 :      nsString& outputHTML)
     452                 : {
     453                 :   // Create *uri from txtURL
     454               0 :   nsCOMPtr<nsIURI> uri;
     455                 :   nsresult rv;
     456                 :   // Lazily initialize mIOService
     457               0 :   if (!mIOService)
     458                 :   {
     459               0 :     mIOService = do_GetIOService();
     460                 : 
     461               0 :     if (!mIOService)
     462               0 :       return false;
     463                 :   }
     464                 : 
     465                 :   // See if the url should be linkified.
     466               0 :   NS_ConvertUTF16toUTF8 utf8URL(txtURL);
     467               0 :   if (!ShouldLinkify(utf8URL))
     468               0 :     return false;
     469                 : 
     470                 :   // it would be faster if we could just check to see if there is a protocol
     471                 :   // handler for the url and return instead of actually trying to create a url...
     472               0 :   rv = mIOService->NewURI(utf8URL, nsnull, nsnull, getter_AddRefs(uri));
     473                 : 
     474                 :   // Real work
     475               0 :   if (NS_SUCCEEDED(rv) && uri)
     476                 :   {
     477               0 :     outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
     478               0 :     switch(mode)
     479                 :     {
     480                 :     case RFC1738:
     481               0 :       outputHTML.AppendLiteral("rfc1738");
     482               0 :       break;
     483                 :     case RFC2396E:
     484               0 :       outputHTML.AppendLiteral("rfc2396E");
     485               0 :       break;
     486                 :     case freetext:
     487               0 :       outputHTML.AppendLiteral("freetext");
     488               0 :       break;
     489                 :     case abbreviated:
     490               0 :       outputHTML.AppendLiteral("abbreviated");
     491               0 :       break;
     492               0 :     default: break;
     493                 :     }
     494               0 :     nsAutoString escapedURL(txtURL);
     495               0 :     EscapeStr(escapedURL, true);
     496                 : 
     497               0 :     outputHTML.AppendLiteral("\" href=\"");
     498               0 :     outputHTML += escapedURL;
     499               0 :     outputHTML.AppendLiteral("\">");
     500               0 :     outputHTML += desc;
     501               0 :     outputHTML.AppendLiteral("</a>");
     502               0 :     return true;
     503                 :   }
     504                 :   else
     505               0 :     return false;
     506                 : }
     507                 : 
     508               0 : NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const PRUnichar * aInString, PRInt32 aInLength, PRInt32 aPos, PRInt32 * aStartPos, PRInt32 * aEndPos)
     509                 : {
     510                 :   // call FindURL on the passed in string
     511               0 :   nsAutoString outputHTML; // we'll ignore the generated output HTML
     512                 : 
     513               0 :   *aStartPos = -1;
     514               0 :   *aEndPos = -1;
     515                 : 
     516               0 :   FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
     517                 : 
     518               0 :   return NS_OK;
     519                 : }
     520                 : 
     521                 : bool
     522               0 : mozTXTToHTMLConv::FindURL(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
     523                 :      const PRUint32 whathasbeendone,
     524                 :      nsString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter)
     525                 : {
     526                 :   enum statetype {unchecked, invalid, startok, endok, success};
     527                 :   static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
     528                 : 
     529                 :   statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
     530                 :   /* I don't like this abuse of enums as index for the array,
     531                 :      but I don't know a better method */
     532                 : 
     533                 :   // Define, which modes to check
     534                 :   /* all modes but abbreviated are checked for text[pos] == ':',
     535                 :      only abbreviated for '.', RFC2396E and abbreviated for '@' */
     536               0 :   for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
     537                 :        iState = modetype(iState + 1))
     538               0 :     state[iState] = aInString[pos] == ':' ? unchecked : invalid;
     539               0 :   switch (aInString[pos])
     540                 :   {
     541                 :   case '@':
     542               0 :     state[RFC2396E] = unchecked;
     543                 :     // no break here
     544                 :   case '.':
     545               0 :     state[abbreviated] = unchecked;
     546               0 :     break;
     547                 :   case ':':
     548               0 :     state[abbreviated] = invalid;
     549               0 :     break;
     550                 :   default:
     551               0 :     break;
     552                 :   }
     553                 : 
     554                 :   // Test, first successful mode wins, sequence defined by |ranking|
     555               0 :   PRInt32 iCheck = 0;  // the currently tested modetype
     556               0 :   modetype check = ranking[iCheck];
     557               0 :   for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
     558                 :        iCheck++)
     559                 :     /* check state from last run.
     560                 :        If this is the first, check this one, which isn't = success yet */
     561                 :   {
     562               0 :     check = ranking[iCheck];
     563                 : 
     564                 :     PRUint32 start, end;
     565                 : 
     566               0 :     if (state[check] == unchecked)
     567               0 :       if (FindURLStart(aInString, aInLength, pos, check, start))
     568               0 :         state[check] = startok;
     569                 : 
     570               0 :     if (state[check] == startok)
     571               0 :       if (FindURLEnd(aInString, aInLength, pos, check, start, end))
     572               0 :         state[check] = endok;
     573                 : 
     574               0 :     if (state[check] == endok)
     575                 :     {
     576               0 :       nsAutoString txtURL, desc;
     577                 :       PRInt32 resultReplaceBefore, resultReplaceAfter;
     578                 : 
     579                 :       CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
     580                 :                              txtURL, desc,
     581               0 :                              resultReplaceBefore, resultReplaceAfter);
     582                 : 
     583               0 :       if (aInString[pos] != ':')
     584                 :       {
     585               0 :         nsAutoString temp = txtURL;
     586               0 :         txtURL.SetLength(0);
     587               0 :         CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
     588                 :       }
     589                 : 
     590               0 :       if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
     591               0 :                                                      outputHTML))
     592                 :       {
     593               0 :         replaceBefore = resultReplaceBefore;
     594               0 :         replaceAfter = resultReplaceAfter;
     595               0 :         state[check] = success;
     596                 :       }
     597                 :     } // if
     598                 :   } // for
     599               0 :   return state[check] == success;
     600                 : }
     601                 : 
     602                 : bool
     603               0 : mozTXTToHTMLConv::ItMatchesDelimited(const PRUnichar * aInString,
     604                 :     PRInt32 aInLength, const PRUnichar* rep, PRInt32 aRepLen,
     605                 :     LIMTYPE before, LIMTYPE after)
     606                 : {
     607                 : 
     608                 :   // this little method gets called a LOT. I found we were spending a
     609                 :   // lot of time just calculating the length of the variable "rep"
     610                 :   // over and over again every time we called it. So we're now passing
     611                 :   // an integer in here.
     612               0 :   PRInt32 textLen = aInLength;
     613                 : 
     614               0 :   if
     615                 :     (
     616                 :       ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
     617                 :         && textLen < aRepLen) ||
     618                 :       ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
     619                 :         && textLen < aRepLen + 1) ||
     620                 :       (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
     621                 :         && textLen < aRepLen + 2)
     622                 :     )
     623               0 :     return false;
     624                 : 
     625               0 :   PRUnichar text0 = aInString[0];
     626               0 :   PRUnichar textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
     627                 : 
     628               0 :   if
     629                 :     (
     630                 :       (before == LT_ALPHA
     631               0 :         && !nsCRT::IsAsciiAlpha(text0)) ||
     632                 :       (before == LT_DIGIT
     633               0 :         && !nsCRT::IsAsciiDigit(text0)) ||
     634                 :       (before == LT_DELIMITER
     635                 :         &&
     636                 :         (
     637               0 :           nsCRT::IsAsciiAlpha(text0) ||
     638               0 :           nsCRT::IsAsciiDigit(text0) ||
     639                 :           text0 == *rep
     640                 :         )) ||
     641                 :       (after == LT_ALPHA
     642               0 :         && !nsCRT::IsAsciiAlpha(textAfterPos)) ||
     643                 :       (after == LT_DIGIT
     644               0 :         && !nsCRT::IsAsciiDigit(textAfterPos)) ||
     645                 :       (after == LT_DELIMITER
     646                 :         &&
     647                 :         (
     648               0 :           nsCRT::IsAsciiAlpha(textAfterPos) ||
     649               0 :           nsCRT::IsAsciiDigit(textAfterPos) ||
     650                 :           textAfterPos == *rep
     651                 :         )) ||
     652               0 :         !Substring(Substring(aInString, aInString+aInLength),
     653                 :                    (before == LT_IGNORE ? 0 : 1),
     654               0 :                    aRepLen).Equals(Substring(rep, rep+aRepLen),
     655               0 :                                    nsCaseInsensitiveStringComparator())
     656                 :     )
     657               0 :     return false;
     658                 : 
     659               0 :   return true;
     660                 : }
     661                 : 
     662                 : PRUint32
     663               0 : mozTXTToHTMLConv::NumberOfMatches(const PRUnichar * aInString, PRInt32 aInStringLength, 
     664                 :      const PRUnichar* rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after)
     665                 : {
     666               0 :   PRUint32 result = 0;
     667                 : 
     668               0 :   for (PRInt32 i = 0; i < aInStringLength; i++)
     669                 :   {
     670               0 :     const PRUnichar * indexIntoString = &aInString[i];
     671               0 :     if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
     672               0 :       result++;
     673                 :   }
     674               0 :   return result;
     675                 : }
     676                 : 
     677                 : 
     678                 : // NOTE: the converted html for the phrase is appended to aOutString
     679                 : // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
     680                 : bool
     681               0 : mozTXTToHTMLConv::StructPhraseHit(const PRUnichar * aInString, PRInt32 aInStringLength, bool col0,
     682                 :      const PRUnichar* tagTXT, PRInt32 aTagTXTLen, 
     683                 :      const char* tagHTML, const char* attributeHTML,
     684                 :      nsString& aOutString, PRUint32& openTags)
     685                 : {
     686                 :   /* We're searching for the following pattern:
     687                 :      LT_DELIMITER - "*" - ALPHA -
     688                 :      [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
     689                 :      <strong> is only inserted, if existence of a pair could be verified
     690                 :      We use the first opening/closing tag, if we can choose */
     691                 : 
     692               0 :   const PRUnichar * newOffset = aInString;
     693               0 :   PRInt32 newLength = aInStringLength;
     694               0 :   if (!col0) // skip the first element?
     695                 :   {
     696               0 :     newOffset = &aInString[1];
     697               0 :     newLength = aInStringLength - 1;
     698                 :   }
     699                 : 
     700                 :   // opening tag
     701               0 :   if
     702                 :     (
     703                 :       ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, 
     704               0 :            (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
     705                 :         && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, 
     706               0 :               LT_ALPHA, LT_DELIMITER)  // remaining closing tags
     707                 :               > openTags
     708                 :     )
     709                 :   {
     710               0 :     openTags++;
     711               0 :     aOutString.AppendLiteral("<");
     712               0 :     aOutString.AppendASCII(tagHTML);
     713               0 :     aOutString.Append(PRUnichar(' '));
     714               0 :     aOutString.AppendASCII(attributeHTML);
     715               0 :     aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
     716               0 :     aOutString.Append(tagTXT);
     717               0 :     aOutString.AppendLiteral("</span>");
     718               0 :     return true;
     719                 :   }
     720                 : 
     721                 :   // closing tag
     722               0 :   else if (openTags > 0
     723               0 :        && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
     724                 :   {
     725               0 :     openTags--;
     726               0 :     aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
     727               0 :     aOutString.Append(tagTXT);
     728               0 :     aOutString.AppendLiteral("</span></");
     729               0 :     aOutString.AppendASCII(tagHTML);
     730               0 :     aOutString.Append(PRUnichar('>'));
     731               0 :     return true;
     732                 :   }
     733                 : 
     734               0 :   return false;
     735                 : }
     736                 : 
     737                 : 
     738                 : bool
     739               0 : mozTXTToHTMLConv::SmilyHit(const PRUnichar * aInString, PRInt32 aLength, bool col0,
     740                 :          const char* tagTXT, const char* imageName,
     741                 :          nsString& outputHTML, PRInt32& glyphTextLen)
     742                 : {
     743               0 :   if ( !aInString || !tagTXT || !imageName )
     744               0 :       return false;
     745                 : 
     746               0 :   PRInt32  tagLen = nsCRT::strlen(tagTXT);
     747                 :  
     748               0 :   PRUint32 delim = (col0 ? 0 : 1) + tagLen;
     749                 : 
     750               0 :   if
     751                 :     (
     752               0 :       (col0 || IsSpace(aInString[0]))
     753                 :         &&
     754                 :         (
     755                 :           aLength <= PRInt32(delim) ||
     756               0 :           IsSpace(aInString[delim]) ||
     757                 :           (aLength > PRInt32(delim + 1)
     758                 :             &&
     759                 :             (
     760               0 :               aInString[delim] == '.' ||
     761               0 :               aInString[delim] == ',' ||
     762               0 :               aInString[delim] == ';' ||
     763               0 :               aInString[delim] == '8' ||
     764               0 :               aInString[delim] == '>' ||
     765               0 :               aInString[delim] == '!' ||
     766               0 :               aInString[delim] == '?'
     767                 :             )
     768               0 :             && IsSpace(aInString[delim + 1]))
     769                 :         )
     770               0 :         && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, 
     771               0 :                               col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
     772                 :                 // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
     773                 :     )
     774                 :   {
     775               0 :     if (!col0)
     776                 :     {
     777               0 :       outputHTML.Truncate();
     778               0 :       outputHTML.Append(PRUnichar(' '));
     779                 :     }
     780                 : 
     781               0 :     outputHTML.AppendLiteral("<span class=\""); // <span class="
     782               0 :     AppendASCIItoUTF16(imageName, outputHTML);  // e.g. smiley-frown
     783               0 :     outputHTML.AppendLiteral("\" title=\"");    // " title="     
     784               0 :     AppendASCIItoUTF16(tagTXT, outputHTML);     // smiley tooltip
     785               0 :     outputHTML.AppendLiteral("\"><span>");      // "><span>      
     786               0 :     AppendASCIItoUTF16(tagTXT, outputHTML);     // original text 
     787               0 :     outputHTML.AppendLiteral("</span></span>"); // </span></span>
     788               0 :     glyphTextLen = (col0 ? 0 : 1) + tagLen;
     789               0 :     return true;
     790                 :   }
     791                 : 
     792               0 :   return false;
     793                 : }
     794                 : 
     795                 : // the glyph is appended to aOutputString instead of the original string...
     796                 : bool
     797               0 : mozTXTToHTMLConv::GlyphHit(const PRUnichar * aInString, PRInt32 aInLength, bool col0,
     798                 :          nsString& aOutputString, PRInt32& glyphTextLen)
     799                 : {
     800               0 :   PRUnichar text0 = aInString[0]; 
     801               0 :   PRUnichar text1 = aInString[1];
     802               0 :   PRUnichar firstChar = (col0 ? text0 : text1);
     803                 : 
     804                 :   // temporary variable used to store the glyph html text
     805               0 :   nsAutoString outputHTML;
     806                 :   bool bTestSmilie;
     807                 :   bool bArg;
     808                 :   int i;
     809                 : 
     810                 :   // refactor some of this mess to avoid code duplication and speed execution a bit
     811                 :   // there are two cases that need to be tried one after another. To avoid a lot of
     812                 :   // duplicate code, rolling into a loop
     813                 : 
     814               0 :   i = 0;
     815               0 :   while ( i < 2 )
     816                 :   {
     817               0 :     bTestSmilie = false;
     818               0 :     if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
     819                 :     {
     820                 :         // first test passed
     821                 : 
     822               0 :         bTestSmilie = true;
     823               0 :         bArg = col0;
     824                 :     }
     825               0 :     if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
     826                 :     {
     827                 :         // second test passed
     828                 : 
     829               0 :         bTestSmilie = true;
     830               0 :         bArg = false;
     831                 :     }
     832               0 :     if ( bTestSmilie && (
     833                 :           SmilyHit(aInString, aInLength, bArg,
     834                 :                    ":-)",
     835                 :                    "moz-smiley-s1", // smile
     836               0 :                    outputHTML, glyphTextLen) ||
     837                 :   
     838                 :           SmilyHit(aInString, aInLength, bArg,
     839                 :                    ":)",
     840                 :                    "moz-smiley-s1", // smile
     841               0 :                    outputHTML, glyphTextLen) ||
     842                 :           
     843                 :           SmilyHit(aInString, aInLength, bArg,
     844                 :                    ":-D",
     845                 :                    "moz-smiley-s5", // laughing
     846               0 :                    outputHTML, glyphTextLen) ||
     847                 :           
     848                 :           SmilyHit(aInString, aInLength, bArg,
     849                 :                    ":-(",
     850                 :                    "moz-smiley-s2", // frown
     851               0 :                    outputHTML, glyphTextLen) ||
     852                 :           
     853                 :           SmilyHit(aInString, aInLength, bArg,
     854                 :                    ":(",
     855                 :                    "moz-smiley-s2", // frown
     856               0 :                    outputHTML, glyphTextLen) ||
     857                 :           
     858                 :           SmilyHit(aInString, aInLength, bArg,
     859                 :                    ":-[",
     860                 :                    "moz-smiley-s6", // embarassed
     861               0 :                    outputHTML, glyphTextLen) ||
     862                 :           
     863                 :           SmilyHit(aInString, aInLength, bArg,
     864                 :                    ";-)",
     865                 :                    "moz-smiley-s3", // wink
     866               0 :                    outputHTML, glyphTextLen) ||
     867                 : 
     868                 :           SmilyHit(aInString, aInLength, col0,
     869                 :                    ";)",
     870                 :                    "moz-smiley-s3", // wink
     871               0 :                    outputHTML, glyphTextLen) ||
     872                 :           
     873                 :           SmilyHit(aInString, aInLength, bArg,
     874                 :                    ":-\\",
     875                 :                    "moz-smiley-s7", // undecided
     876               0 :                    outputHTML, glyphTextLen) ||
     877                 :           
     878                 :           SmilyHit(aInString, aInLength, bArg,
     879                 :                    ":-P",
     880                 :                    "moz-smiley-s4", // tongue
     881               0 :                    outputHTML, glyphTextLen) ||
     882                 :                    
     883                 :           SmilyHit(aInString, aInLength, bArg,
     884                 :                    ";-P",
     885                 :                    "moz-smiley-s4", // tongue
     886               0 :                    outputHTML, glyphTextLen) ||  
     887                 :          
     888                 :           SmilyHit(aInString, aInLength, bArg,
     889                 :                    "=-O",
     890                 :                    "moz-smiley-s8", // surprise
     891               0 :                    outputHTML, glyphTextLen) ||
     892                 :          
     893                 :           SmilyHit(aInString, aInLength, bArg,
     894                 :                    ":-*",
     895                 :                    "moz-smiley-s9", // kiss
     896               0 :                    outputHTML, glyphTextLen) ||
     897                 :          
     898                 :           SmilyHit(aInString, aInLength, bArg,
     899                 :                    ">:o",
     900                 :                    "moz-smiley-s10", // yell
     901               0 :                    outputHTML, glyphTextLen) ||
     902                 :           
     903                 :           SmilyHit(aInString, aInLength, bArg,
     904                 :                    ">:-o",
     905                 :                    "moz-smiley-s10", // yell
     906               0 :                    outputHTML, glyphTextLen) ||
     907                 :         
     908                 :           SmilyHit(aInString, aInLength, bArg,
     909                 :                    "8-)",
     910                 :                    "moz-smiley-s11", // cool
     911               0 :                    outputHTML, glyphTextLen) ||
     912                 :          
     913                 :           SmilyHit(aInString, aInLength, bArg,
     914                 :                    ":-$",
     915                 :                    "moz-smiley-s12", // money
     916               0 :                    outputHTML, glyphTextLen) ||
     917                 :          
     918                 :           SmilyHit(aInString, aInLength, bArg,
     919                 :                    ":-!",
     920                 :                    "moz-smiley-s13", // foot
     921               0 :                    outputHTML, glyphTextLen) ||
     922                 :          
     923                 :           SmilyHit(aInString, aInLength, bArg,
     924                 :                    "O:-)",
     925                 :                    "moz-smiley-s14", // innocent
     926               0 :                    outputHTML, glyphTextLen) ||
     927                 :          
     928                 :           SmilyHit(aInString, aInLength, bArg,
     929                 :                    ":'(",
     930                 :                    "moz-smiley-s15", // cry
     931               0 :                    outputHTML, glyphTextLen) ||
     932                 :          
     933                 :           SmilyHit(aInString, aInLength, bArg,
     934                 :                    ":-X",
     935                 :                    "moz-smiley-s16", // sealed
     936               0 :                    outputHTML, glyphTextLen) 
     937                 :         )
     938                 :     )
     939                 :     {
     940               0 :         aOutputString.Append(outputHTML);
     941               0 :         return true;
     942                 :     }
     943               0 :     i++;
     944                 :   }
     945               0 :   if (text0 == '\f')
     946                 :   {
     947               0 :       aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
     948               0 :       glyphTextLen = 1;
     949               0 :       return true;
     950                 :   }
     951               0 :   if (text0 == '+' || text1 == '+')
     952                 :   {
     953               0 :     if (ItMatchesDelimited(aInString, aInLength,
     954               0 :                            NS_LITERAL_STRING(" +/-").get(), 4,
     955               0 :                            LT_IGNORE, LT_IGNORE))
     956                 :     {
     957               0 :       aOutputString.AppendLiteral(" &plusmn;");
     958               0 :       glyphTextLen = 4;
     959               0 :       return true;
     960                 :     }
     961               0 :     if (col0 && ItMatchesDelimited(aInString, aInLength,
     962               0 :                                    NS_LITERAL_STRING("+/-").get(), 3,
     963               0 :                                    LT_IGNORE, LT_IGNORE))
     964                 :     {
     965               0 :       aOutputString.AppendLiteral("&plusmn;");
     966               0 :       glyphTextLen = 3;
     967               0 :       return true;
     968                 :     }
     969                 :   }
     970                 : 
     971                 :   // x^2  =>  x<sup>2</sup>,   also handle powers x^-2,  x^0.5
     972                 :   // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
     973               0 :   if    
     974                 :     (
     975                 :       text1 == '^'
     976                 :       && 
     977                 :       (
     978               0 :         nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) || 
     979                 :         text0 == ')' || text0 == ']' || text0 == '}'
     980                 :       )
     981                 :       &&
     982                 :       (
     983               0 :         (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) ||
     984               0 :         (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3]))
     985                 :       )
     986                 :     )
     987                 :   {
     988                 :     // Find first non-digit
     989               0 :     PRInt32 delimPos = 3;  // skip "^" and first digit (or '-')
     990               0 :     for (; delimPos < aInLength
     991                 :            &&
     992                 :            (
     993               0 :              nsCRT::IsAsciiDigit(aInString[delimPos]) || 
     994               0 :              (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
     995               0 :                nsCRT::IsAsciiDigit(aInString[delimPos + 1]))
     996                 :            );
     997                 :          delimPos++)
     998                 :       ;
     999                 : 
    1000               0 :     if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
    1001                 :     {
    1002               0 :       return false;
    1003                 :     }
    1004                 : 
    1005               0 :     outputHTML.Truncate();
    1006               0 :     outputHTML += text0;
    1007                 :     outputHTML.AppendLiteral(
    1008                 :       "<sup class=\"moz-txt-sup\">"
    1009                 :       "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
    1010               0 :       "^</span>");
    1011                 : 
    1012               0 :     aOutputString.Append(outputHTML);
    1013               0 :     aOutputString.Append(&aInString[2], delimPos - 2);
    1014               0 :     aOutputString.AppendLiteral("</sup>");
    1015                 : 
    1016               0 :     glyphTextLen = delimPos /* - 1 + 1 */ ;
    1017               0 :     return true;
    1018                 :   }
    1019                 :   /*
    1020                 :    The following strings are not substituted:
    1021                 :    |TXT   |HTML     |Reason
    1022                 :    +------+---------+----------
    1023                 :     ->     &larr;    Bug #454
    1024                 :     =>     &lArr;    dito
    1025                 :     <-     &rarr;    dito
    1026                 :     <=     &rArr;    dito
    1027                 :     (tm)   &trade;   dito
    1028                 :     1/4    &frac14;  is triggered by 1/4 Part 1, 2/4 Part 2, ...
    1029                 :     3/4    &frac34;  dito
    1030                 :     1/2    &frac12;  similar
    1031                 :   */
    1032               0 :   return false;
    1033                 : }
    1034                 : 
    1035                 : /***************************************************************************
    1036                 :   Library-internal Interface
    1037                 : ****************************************************************************/
    1038                 : 
    1039               0 : mozTXTToHTMLConv::mozTXTToHTMLConv()
    1040                 : {
    1041               0 : }
    1042                 : 
    1043               0 : mozTXTToHTMLConv::~mozTXTToHTMLConv() 
    1044                 : {
    1045               0 : }
    1046                 : 
    1047               0 : NS_IMPL_ISUPPORTS4(mozTXTToHTMLConv,
    1048                 :                    mozITXTToHTMLConv,
    1049                 :                    nsIStreamConverter,
    1050                 :                    nsIStreamListener,
    1051                 :                    nsIRequestObserver)
    1052                 : 
    1053                 : PRInt32
    1054               0 : mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line,
    1055                 :                                     PRUint32& logLineStart)
    1056                 : {
    1057               0 :   PRInt32 result = 0;
    1058               0 :   PRInt32 lineLength = nsCRT::strlen(line);
    1059                 : 
    1060               0 :   bool moreCites = true;
    1061               0 :   while (moreCites)
    1062                 :   {
    1063                 :     /* E.g. the following lines count as quote:
    1064                 : 
    1065                 :        > text
    1066                 :        //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
    1067                 :        >text
    1068                 :        //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
    1069                 :            > text
    1070                 :        ] text
    1071                 :        USER> text
    1072                 :        USER] text
    1073                 :        //#endif
    1074                 : 
    1075                 :        logLineStart is the position of "t" in this example
    1076                 :     */
    1077               0 :     PRUint32 i = logLineStart;
    1078                 : 
    1079                 : #ifdef QUOTE_RECOGNITION_AGGRESSIVE
    1080                 :     for (; PRInt32(i) < lineLength && IsSpace(line[i]); i++)
    1081                 :       ;
    1082                 :     for (; PRInt32(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
    1083                 :                                    && nsCRT::IsUpper(line[i])   ; i++)
    1084                 :       ;
    1085                 :     if (PRInt32(i) < lineLength && (line[i] == '>' || line[i] == ']'))
    1086                 : #else
    1087               0 :     if (PRInt32(i) < lineLength && line[i] == '>')
    1088                 : #endif
    1089                 :     {
    1090               0 :       i++;
    1091               0 :       if (PRInt32(i) < lineLength && line[i] == ' ')
    1092               0 :         i++;
    1093                 :       // sendmail/mbox
    1094                 :       // Placed here for performance increase
    1095               0 :       const PRUnichar * indexString = &line[logLineStart];
    1096                 :            // here, |logLineStart < lineLength| is always true
    1097               0 :       PRUint32 minlength = MinInt(6,nsCRT::strlen(indexString));
    1098               0 :       if (Substring(indexString,
    1099               0 :                     indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
    1100               0 :                                                   nsCaseInsensitiveStringComparator()))
    1101                 :         //XXX RFC2646
    1102               0 :         moreCites = false;
    1103                 :       else
    1104                 :       {
    1105               0 :         result++;
    1106               0 :         logLineStart = i;
    1107               0 :       }
    1108                 :     }
    1109                 :     else
    1110               0 :       moreCites = false;
    1111                 :   }
    1112                 : 
    1113               0 :   return result;
    1114                 : }
    1115                 : 
    1116                 : void
    1117               0 : mozTXTToHTMLConv::ScanTXT(const PRUnichar * aInString, PRInt32 aInStringLength, PRUint32 whattodo, nsString& aOutString)
    1118                 : {
    1119               0 :   bool doURLs = 0 != (whattodo & kURLs);
    1120               0 :   bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
    1121               0 :   bool doStructPhrase = 0 != (whattodo & kStructPhrase);
    1122                 : 
    1123               0 :   PRUint32 structPhrase_strong = 0;  // Number of currently open tags
    1124               0 :   PRUint32 structPhrase_underline = 0;
    1125               0 :   PRUint32 structPhrase_italic = 0;
    1126               0 :   PRUint32 structPhrase_code = 0;
    1127                 : 
    1128               0 :   nsAutoString outputHTML;  // moved here for performance increase
    1129                 : 
    1130               0 :   for(PRUint32 i = 0; PRInt32(i) < aInStringLength;)
    1131                 :   {
    1132               0 :     if (doGlyphSubstitution)
    1133                 :     {
    1134                 :       PRInt32 glyphTextLen;
    1135               0 :       if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
    1136                 :       {
    1137               0 :         i += glyphTextLen;
    1138               0 :         continue;
    1139                 :       }
    1140                 :     }
    1141                 : 
    1142               0 :     if (doStructPhrase)
    1143                 :     {
    1144               0 :       const PRUnichar * newOffset = aInString;
    1145               0 :       PRInt32 newLength = aInStringLength;
    1146               0 :       if (i > 0 ) // skip the first element?
    1147                 :       {
    1148               0 :         newOffset = &aInString[i-1];
    1149               0 :         newLength = aInStringLength - i + 1;
    1150                 :       }
    1151                 : 
    1152               0 :       switch (aInString[i]) // Performance increase
    1153                 :       {
    1154                 :       case '*':
    1155               0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1156               0 :                             NS_LITERAL_STRING("*").get(), 1,
    1157                 :                             "b", "class=\"moz-txt-star\"",
    1158               0 :                             aOutString, structPhrase_strong))
    1159                 :         {
    1160               0 :           i++;
    1161               0 :           continue;
    1162                 :         }
    1163               0 :         break;
    1164                 :       case '/':
    1165               0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1166               0 :                             NS_LITERAL_STRING("/").get(), 1,
    1167                 :                             "i", "class=\"moz-txt-slash\"",
    1168               0 :                             aOutString, structPhrase_italic))
    1169                 :         {
    1170               0 :           i++;
    1171               0 :           continue;
    1172                 :         }
    1173               0 :         break;
    1174                 :       case '_':
    1175               0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1176               0 :                             NS_LITERAL_STRING("_").get(), 1,
    1177                 :                             "span" /* <u> is deprecated */,
    1178                 :                             "class=\"moz-txt-underscore\"",
    1179               0 :                             aOutString, structPhrase_underline))
    1180                 :         {
    1181               0 :           i++;
    1182               0 :           continue;
    1183                 :         }
    1184               0 :         break;
    1185                 :       case '|':
    1186               0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1187               0 :                             NS_LITERAL_STRING("|").get(), 1,
    1188                 :                             "code", "class=\"moz-txt-verticalline\"",
    1189               0 :                             aOutString, structPhrase_code))
    1190                 :         {
    1191               0 :           i++;
    1192               0 :           continue;
    1193                 :         }
    1194               0 :         break;
    1195                 :       }
    1196                 :     }
    1197                 : 
    1198               0 :     if (doURLs)
    1199                 :     {
    1200               0 :       switch (aInString[i])
    1201                 :       {
    1202                 :       case ':':
    1203                 :       case '@':
    1204                 :       case '.':
    1205               0 :         if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
    1206                 :         {
    1207                 :           PRInt32 replaceBefore;
    1208                 :           PRInt32 replaceAfter;
    1209               0 :           if (FindURL(aInString, aInStringLength, i, whattodo,
    1210               0 :                       outputHTML, replaceBefore, replaceAfter)
    1211                 :                   && structPhrase_strong + structPhrase_italic +
    1212                 :                        structPhrase_underline + structPhrase_code == 0
    1213                 :                        /* workaround for bug #19445 */ )
    1214                 :           {
    1215               0 :             aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
    1216               0 :             aOutString += outputHTML;
    1217               0 :             i += replaceAfter + 1;
    1218               0 :             continue;
    1219                 :           }
    1220                 :         }
    1221               0 :         break;
    1222                 :       } //switch
    1223                 :     }
    1224                 : 
    1225               0 :     switch (aInString[i])
    1226                 :     {
    1227                 :     // Special symbols
    1228                 :     case '<':
    1229                 :     case '>':
    1230                 :     case '&':
    1231               0 :       EscapeChar(aInString[i], aOutString, false);
    1232               0 :       i++;
    1233               0 :       break;
    1234                 :     // Normal characters
    1235                 :     default:
    1236               0 :       aOutString += aInString[i];
    1237               0 :       i++;
    1238               0 :       break;
    1239                 :     }
    1240                 :   }
    1241               0 : }
    1242                 : 
    1243                 : void
    1244               0 : mozTXTToHTMLConv::ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString)
    1245                 : { 
    1246                 :   // some common variables we were recalculating
    1247                 :   // every time inside the for loop...
    1248               0 :   PRInt32 lengthOfInString = aInString.Length();
    1249               0 :   const PRUnichar * uniBuffer = aInString.get();
    1250                 : 
    1251                 : #ifdef DEBUG_BenB_Perf
    1252                 :   PRTime parsing_start = PR_IntervalNow();
    1253                 : #endif
    1254                 : 
    1255                 :   // Look for simple entities not included in a tags and scan them.
    1256                 :   /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
    1257                 :      or in a tag ("<!--[...]-->").
    1258                 :      Unescape the rest (text between tags) and pass it to ScanTXT. */
    1259               0 :   for (PRInt32 i = 0; i < lengthOfInString;)
    1260                 :   {
    1261               0 :     if (aInString[i] == '<')  // html tag
    1262                 :     {
    1263               0 :       PRUint32 start = PRUint32(i);
    1264               0 :       if (nsCRT::ToLower((char)aInString[PRUint32(i) + 1]) == 'a')
    1265                 :            // if a tag, skip until </a>
    1266                 :       {
    1267               0 :         i = aInString.Find("</a>", true, i);
    1268               0 :         if (i == kNotFound)
    1269               0 :           i = lengthOfInString;
    1270                 :         else
    1271               0 :           i += 4;
    1272                 :       }
    1273               0 :       else if (aInString[PRUint32(i) + 1] == '!' && aInString[PRUint32(i) + 2] == '-' &&
    1274               0 :         aInString[PRUint32(i) + 3] == '-')
    1275                 :           //if out-commended code, skip until -->
    1276                 :       {
    1277               0 :         i = aInString.Find("-->", false, i);
    1278               0 :         if (i == kNotFound)
    1279               0 :           i = lengthOfInString;
    1280                 :         else
    1281               0 :           i += 3;
    1282                 : 
    1283                 :       }
    1284                 :       else  // just skip tag (attributes etc.)
    1285                 :       {
    1286               0 :         i = aInString.FindChar('>', i);
    1287               0 :         if (i == kNotFound)
    1288               0 :           i = lengthOfInString;
    1289                 :         else
    1290               0 :           i++;
    1291                 :       }
    1292               0 :       aOutString.Append(&uniBuffer[start], PRUint32(i) - start);
    1293                 :     }
    1294                 :     else
    1295                 :     {
    1296               0 :       PRUint32 start = PRUint32(i);
    1297               0 :       i = aInString.FindChar('<', i);
    1298               0 :       if (i == kNotFound)
    1299               0 :         i = lengthOfInString;
    1300                 :   
    1301               0 :       nsString tempString;     
    1302               0 :       tempString.SetCapacity(PRUint32((PRUint32(i) - start) * growthRate));
    1303               0 :       UnescapeStr(uniBuffer, start, PRUint32(i) - start, tempString);
    1304               0 :       ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
    1305                 :     }
    1306                 :   }
    1307                 : 
    1308                 : #ifdef DEBUG_BenB_Perf
    1309                 :   printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
    1310                 : #endif
    1311               0 : }
    1312                 : 
    1313                 : /****************************************************************************
    1314                 :   XPCOM Interface
    1315                 : *****************************************************************************/
    1316                 : 
    1317                 : NS_IMETHODIMP
    1318               0 : mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
    1319                 :                           const char *aFromType,
    1320                 :                           const char *aToType,
    1321                 :                           nsISupports *aCtxt, nsIInputStream **_retval)
    1322                 : {
    1323               0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1324                 : }
    1325                 : 
    1326                 : NS_IMETHODIMP
    1327               0 : mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
    1328                 :                                    const char *aToType,
    1329                 :                                    nsIStreamListener *aListener, nsISupports *aCtxt) {
    1330               0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1331                 : }
    1332                 : 
    1333                 : NS_IMETHODIMP
    1334               0 : mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
    1335                 :                                  nsIInputStream *inStr, PRUint32 sourceOffset,
    1336                 :                                  PRUint32 count)
    1337                 : {
    1338               0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1339                 : }
    1340                 : 
    1341                 : NS_IMETHODIMP
    1342               0 : mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
    1343                 : {
    1344               0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1345                 : }
    1346                 : 
    1347                 : NS_IMETHODIMP
    1348               0 : mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
    1349                 :                                 nsresult aStatus)
    1350                 : {
    1351               0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1352                 : }
    1353                 : 
    1354                 : NS_IMETHODIMP
    1355               0 : mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line, PRUint32 *logLineStart,
    1356                 :                                 PRUint32 *_retval)
    1357                 : {
    1358               0 :    if (!logLineStart || !_retval || !line)
    1359               0 :      return NS_ERROR_NULL_POINTER;
    1360               0 :    *_retval = CiteLevelTXT(line, *logLineStart);
    1361               0 :    return NS_OK;
    1362                 : }
    1363                 : 
    1364                 : NS_IMETHODIMP
    1365               0 : mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo,
    1366                 :                            PRUnichar **_retval)
    1367                 : {
    1368               0 :   NS_ENSURE_ARG(text);
    1369                 : 
    1370                 :   // FIX ME!!!
    1371               0 :   nsString outString;
    1372               0 :   PRInt32 inLength = nsCRT::strlen(text);
    1373                 :   // by setting a large capacity up front, we save time
    1374                 :   // when appending characters to the output string because we don't
    1375                 :   // need to reallocate and re-copy the characters already in the out String.
    1376               0 :   NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
    1377               0 :   if (inLength == 0) {
    1378               0 :     *_retval = nsCRT::strdup(text);
    1379               0 :     return NS_OK;
    1380                 :   }
    1381                 : 
    1382               0 :   outString.SetCapacity(PRUint32(inLength * growthRate));
    1383               0 :   ScanTXT(text, inLength, whattodo, outString);
    1384                 : 
    1385               0 :   *_retval = ToNewUnicode(outString);
    1386               0 :   return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
    1387                 : }
    1388                 : 
    1389                 : NS_IMETHODIMP
    1390               0 : mozTXTToHTMLConv::ScanHTML(const PRUnichar *text, PRUint32 whattodo,
    1391                 :                             PRUnichar **_retval)
    1392                 : {
    1393               0 :   NS_ENSURE_ARG(text);
    1394                 : 
    1395                 :   // FIX ME!!!
    1396               0 :   nsString outString;
    1397               0 :   nsString inString (text); // look at this nasty extra copy of the entire input buffer!
    1398               0 :   outString.SetCapacity(PRUint32(inString.Length() * growthRate));
    1399                 : 
    1400               0 :   ScanHTML(inString, whattodo, outString);
    1401               0 :   *_retval = ToNewUnicode(outString);
    1402               0 :   return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
    1403                 : }
    1404                 : 
    1405                 : nsresult
    1406               0 : MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
    1407                 : {
    1408               0 :     NS_PRECONDITION(aConv != nsnull, "null ptr");
    1409               0 :     if (!aConv)
    1410               0 :       return NS_ERROR_NULL_POINTER;
    1411                 : 
    1412               0 :     *aConv = new mozTXTToHTMLConv();
    1413               0 :     if (!*aConv)
    1414               0 :       return NS_ERROR_OUT_OF_MEMORY;
    1415                 : 
    1416               0 :     NS_ADDREF(*aConv);
    1417                 :     //    return (*aConv)->Init();
    1418               0 :     return NS_OK;
    1419                 : }

Generated by: LCOV version 1.7