1 : /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is the Mozilla Text to HTML converter code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Ben Bucksch <http://www.bucksch.org>.
19 : * Portions created by the Initial Developer are Copyright (C) 1999, 2000
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either the GNU General Public License Version 2 or later (the "GPL"), or
26 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "mozTXTToHTMLConv.h"
39 : #include "nsIServiceManager.h"
40 : #include "nsNetCID.h"
41 : #include "nsNetUtil.h"
42 : #include "nsReadableUtils.h"
43 : #include "nsUnicharUtils.h"
44 : #include "nsCRT.h"
45 : #include "nsIExternalProtocolHandler.h"
46 :
47 : static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
48 :
49 : #ifdef DEBUG_BenB_Perf
50 : #include "prtime.h"
51 : #include "prinrval.h"
52 : #endif
53 :
54 : const PRFloat64 growthRate = 1.2;
55 :
56 : // Bug 183111, editor now replaces multiple spaces with leading
57 : // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
58 : // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
59 : // Also recognize the Japanese ideographic space 0x3000 as a space.
60 0 : static inline bool IsSpace(const PRUnichar aChar)
61 : {
62 0 : return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
63 : }
64 :
65 : // Escape Char will take ch, escape it and append the result to
66 : // aStringToAppendTo
67 : void
68 0 : mozTXTToHTMLConv::EscapeChar(const PRUnichar ch, nsString& aStringToAppendTo,
69 : bool inAttribute)
70 : {
71 0 : switch (ch)
72 : {
73 : case '<':
74 0 : aStringToAppendTo.AppendLiteral("<");
75 0 : break;
76 : case '>':
77 0 : aStringToAppendTo.AppendLiteral(">");
78 0 : break;
79 : case '&':
80 0 : aStringToAppendTo.AppendLiteral("&");
81 0 : break;
82 : case '"':
83 0 : if (inAttribute)
84 : {
85 0 : aStringToAppendTo.AppendLiteral(""");
86 0 : break;
87 : }
88 : // else fall through
89 : default:
90 0 : aStringToAppendTo += ch;
91 : }
92 :
93 : return;
94 : }
95 :
96 : // EscapeStr takes the passed in string and
97 : // escapes it IN PLACE.
98 : void
99 0 : mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
100 : {
101 : // the replace substring routines
102 : // don't seem to work if you have a character
103 : // in the in string that is also in the replacement
104 : // string! =(
105 : //aInString.ReplaceSubstring("&", "&");
106 : //aInString.ReplaceSubstring("<", "<");
107 : //aInString.ReplaceSubstring(">", ">");
108 0 : for (PRUint32 i = 0; i < aInString.Length();)
109 : {
110 0 : switch (aInString[i])
111 : {
112 : case '<':
113 0 : aInString.Cut(i, 1);
114 0 : aInString.Insert(NS_LITERAL_STRING("<"), i);
115 0 : i += 4; // skip past the integers we just added
116 0 : break;
117 : case '>':
118 0 : aInString.Cut(i, 1);
119 0 : aInString.Insert(NS_LITERAL_STRING(">"), i);
120 0 : i += 4; // skip past the integers we just added
121 0 : break;
122 : case '&':
123 0 : aInString.Cut(i, 1);
124 0 : aInString.Insert(NS_LITERAL_STRING("&"), i);
125 0 : i += 5; // skip past the integers we just added
126 0 : break;
127 : case '"':
128 0 : if (inAttribute)
129 : {
130 0 : aInString.Cut(i, 1);
131 0 : aInString.Insert(NS_LITERAL_STRING("""), i);
132 0 : i += 6;
133 0 : break;
134 : }
135 : // else fall through
136 : default:
137 0 : i++;
138 : }
139 : }
140 0 : }
141 :
142 : void
143 0 : mozTXTToHTMLConv::UnescapeStr(const PRUnichar * aInString, PRInt32 aStartPos, PRInt32 aLength, nsString& aOutString)
144 : {
145 0 : const PRUnichar * subString = nsnull;
146 0 : for (PRUint32 i = aStartPos; PRInt32(i) - aStartPos < aLength;)
147 : {
148 0 : PRInt32 remainingChars = i - aStartPos;
149 0 : if (aInString[i] == '&')
150 : {
151 0 : subString = &aInString[i];
152 0 : if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("<").get(), MinInt(4, aLength - remainingChars)))
153 : {
154 0 : aOutString.Append(PRUnichar('<'));
155 0 : i += 4;
156 : }
157 0 : else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING(">").get(), MinInt(4, aLength - remainingChars)))
158 : {
159 0 : aOutString.Append(PRUnichar('>'));
160 0 : i += 4;
161 : }
162 0 : else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&").get(), MinInt(5, aLength - remainingChars)))
163 : {
164 0 : aOutString.Append(PRUnichar('&'));
165 0 : i += 5;
166 : }
167 0 : else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING(""").get(), MinInt(6, aLength - remainingChars)))
168 : {
169 0 : aOutString.Append(PRUnichar('"'));
170 0 : i += 6;
171 : }
172 : else
173 : {
174 0 : aOutString += aInString[i];
175 0 : i++;
176 : }
177 : }
178 : else
179 : {
180 0 : aOutString += aInString[i];
181 0 : i++;
182 : }
183 : }
184 0 : }
185 :
186 : void
187 0 : mozTXTToHTMLConv::CompleteAbbreviatedURL(const PRUnichar * aInString, PRInt32 aInLength,
188 : const PRUint32 pos, nsString& aOutString)
189 : {
190 0 : NS_ASSERTION(PRInt32(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
191 0 : if (PRInt32(pos) >= aInLength)
192 0 : return;
193 :
194 0 : if (aInString[pos] == '@')
195 : {
196 : // only pre-pend a mailto url if the string contains a .domain in it..
197 : //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
198 0 : nsDependentString inString(aInString, aInLength);
199 0 : if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
200 : {
201 0 : aOutString.AssignLiteral("mailto:");
202 0 : aOutString += aInString;
203 : }
204 : }
205 0 : else if (aInString[pos] == '.')
206 : {
207 0 : if (ItMatchesDelimited(aInString, aInLength,
208 0 : NS_LITERAL_STRING("www.").get(), 4, LT_IGNORE, LT_IGNORE))
209 : {
210 0 : aOutString.AssignLiteral("http://");
211 0 : aOutString += aInString;
212 : }
213 0 : else if (ItMatchesDelimited(aInString,aInLength, NS_LITERAL_STRING("ftp.").get(), 4, LT_IGNORE, LT_IGNORE))
214 : {
215 0 : aOutString.AssignLiteral("ftp://");
216 0 : aOutString += aInString;
217 : }
218 : }
219 : }
220 :
221 : bool
222 0 : mozTXTToHTMLConv::FindURLStart(const PRUnichar * aInString, PRInt32 aInLength,
223 : const PRUint32 pos, const modetype check,
224 : PRUint32& start)
225 : {
226 0 : switch(check)
227 : { // no breaks, because end of blocks is never reached
228 : case RFC1738:
229 : {
230 0 : if (!nsCRT::strncmp(&aInString[MaxInt(pos - 4, 0)], NS_LITERAL_STRING("<URL:").get(), 5))
231 : {
232 0 : start = pos + 1;
233 0 : return true;
234 : }
235 : else
236 0 : return false;
237 : }
238 : case RFC2396E:
239 : {
240 0 : nsString temp(aInString, aInLength);
241 0 : PRInt32 i = pos <= 0 ? kNotFound : temp.RFindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos - 1);
242 0 : if (i != kNotFound && (temp[PRUint32(i)] == '<' ||
243 0 : temp[PRUint32(i)] == '"'))
244 : {
245 0 : start = PRUint32(++i);
246 0 : return start < pos;
247 : }
248 : else
249 0 : return false;
250 : }
251 : case freetext:
252 : {
253 0 : PRInt32 i = pos - 1;
254 0 : for (; i >= 0 && (
255 0 : nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
256 0 : nsCRT::IsAsciiDigit(aInString[PRUint32(i)]) ||
257 0 : aInString[PRUint32(i)] == '+' ||
258 0 : aInString[PRUint32(i)] == '-' ||
259 0 : aInString[PRUint32(i)] == '.'
260 : ); i--)
261 : ;
262 0 : if (++i >= 0 && PRUint32(i) < pos && nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]))
263 : {
264 0 : start = PRUint32(i);
265 0 : return true;
266 : }
267 : else
268 0 : return false;
269 : }
270 : case abbreviated:
271 : {
272 0 : PRInt32 i = pos - 1;
273 : // This disallows non-ascii-characters for email.
274 : // Currently correct, but revisit later after standards changed.
275 0 : bool isEmail = aInString[pos] == (PRUnichar)'@';
276 : // These chars mark the start of the URL
277 0 : for (; i >= 0
278 0 : && aInString[PRUint32(i)] != '>' && aInString[PRUint32(i)] != '<'
279 0 : && aInString[PRUint32(i)] != '"' && aInString[PRUint32(i)] != '\''
280 0 : && aInString[PRUint32(i)] != '`' && aInString[PRUint32(i)] != ','
281 0 : && aInString[PRUint32(i)] != '{' && aInString[PRUint32(i)] != '['
282 0 : && aInString[PRUint32(i)] != '(' && aInString[PRUint32(i)] != '|'
283 0 : && aInString[PRUint32(i)] != '\\'
284 0 : && !IsSpace(aInString[PRUint32(i)])
285 0 : && (!isEmail || nsCRT::IsAscii(aInString[PRUint32(i)]))
286 : ; i--)
287 : ;
288 0 : if
289 : (
290 : ++i >= 0 && PRUint32(i) < pos
291 : &&
292 : (
293 0 : nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
294 0 : nsCRT::IsAsciiDigit(aInString[PRUint32(i)])
295 : )
296 : )
297 : {
298 0 : start = PRUint32(i);
299 0 : return true;
300 : }
301 : else
302 0 : return false;
303 : }
304 : default:
305 0 : return false;
306 : } //switch
307 : }
308 :
309 : bool
310 0 : mozTXTToHTMLConv::FindURLEnd(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos,
311 : const modetype check, const PRUint32 start, PRUint32& end)
312 : {
313 0 : switch(check)
314 : { // no breaks, because end of blocks is never reached
315 : case RFC1738:
316 : case RFC2396E:
317 : {
318 0 : nsString temp(aInString, aInStringLength);
319 :
320 0 : PRInt32 i = temp.FindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos + 1);
321 0 : if (i != kNotFound && temp[PRUint32(i--)] ==
322 0 : (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
323 : {
324 0 : end = PRUint32(i);
325 0 : return end > pos;
326 : }
327 : else
328 0 : return false;
329 : }
330 : case freetext:
331 : case abbreviated:
332 : {
333 0 : PRUint32 i = pos + 1;
334 0 : bool isEmail = aInString[pos] == (PRUnichar)'@';
335 0 : bool haveOpeningBracket = false;
336 0 : for (; PRInt32(i) < aInStringLength; i++)
337 : {
338 : // These chars mark the end of the URL
339 0 : if (aInString[i] == '>' || aInString[i] == '<' ||
340 0 : aInString[i] == '"' || aInString[i] == '`' ||
341 0 : aInString[i] == '}' || aInString[i] == ']' ||
342 0 : aInString[i] == '{' || aInString[i] == '[' ||
343 0 : aInString[i] == '|' ||
344 0 : (aInString[i] == ')' && !haveOpeningBracket) ||
345 0 : IsSpace(aInString[i]) )
346 0 : break;
347 : // Disallow non-ascii-characters for email.
348 : // Currently correct, but revisit later after standards changed.
349 0 : if (isEmail && (
350 0 : aInString[i] == '(' || aInString[i] == '\'' ||
351 0 : !nsCRT::IsAscii(aInString[i]) ))
352 0 : break;
353 0 : if (aInString[i] == '(')
354 0 : haveOpeningBracket = true;
355 : }
356 : // These chars are allowed in the middle of the URL, but not at end.
357 : // Technically they are, but are used in normal text after the URL.
358 0 : while (--i > pos && (
359 0 : aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
360 0 : aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
361 0 : aInString[i] == ':' || aInString[i] == '\''
362 : ))
363 : ;
364 0 : if (i > pos)
365 : {
366 0 : end = i;
367 0 : return true;
368 : }
369 : else
370 0 : return false;
371 : }
372 : default:
373 0 : return false;
374 : } //switch
375 : }
376 :
377 : void
378 0 : mozTXTToHTMLConv::CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength,
379 : const PRUint32 pos, const PRUint32 whathasbeendone,
380 : const modetype check, const PRUint32 start, const PRUint32 end,
381 : nsString& txtURL, nsString& desc,
382 : PRInt32& replaceBefore, PRInt32& replaceAfter)
383 : {
384 0 : PRUint32 descstart = start;
385 0 : switch(check)
386 : {
387 : case RFC1738:
388 : {
389 0 : descstart = start - 5;
390 0 : desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">"
391 0 : replaceAfter = end - pos + 1;
392 0 : } break;
393 : case RFC2396E:
394 : {
395 0 : descstart = start - 1;
396 0 : desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
397 0 : replaceAfter = end - pos + 1;
398 0 : } break;
399 : case freetext:
400 : case abbreviated:
401 : {
402 0 : descstart = start;
403 0 : desc.Append(&aInString[descstart], end - start + 1); // don't include brackets
404 0 : replaceAfter = end - pos;
405 0 : } break;
406 0 : default: break;
407 : } //switch
408 :
409 0 : EscapeStr(desc, false);
410 :
411 0 : txtURL.Append(&aInString[start], end - start + 1);
412 0 : txtURL.StripWhitespace();
413 :
414 : // FIX ME
415 0 : nsAutoString temp2;
416 0 : ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
417 0 : replaceBefore = temp2.Length();
418 : return;
419 : }
420 :
421 0 : bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
422 : {
423 0 : if (!mIOService)
424 0 : return false;
425 :
426 0 : nsCAutoString scheme;
427 0 : nsresult rv = mIOService->ExtractScheme(aURL, scheme);
428 0 : if(NS_FAILED(rv))
429 0 : return false;
430 :
431 : // Get the handler for this scheme.
432 0 : nsCOMPtr<nsIProtocolHandler> handler;
433 0 : rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
434 0 : if(NS_FAILED(rv))
435 0 : return false;
436 :
437 : // Is it an external protocol handler? If not, linkify it.
438 0 : nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
439 0 : if (!externalHandler)
440 0 : return true; // handler is built-in, linkify it!
441 :
442 : // If external app exists for the scheme then linkify it.
443 : bool exists;
444 0 : rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
445 0 : return(NS_SUCCEEDED(rv) && exists);
446 : }
447 :
448 : bool
449 0 : mozTXTToHTMLConv::CheckURLAndCreateHTML(
450 : const nsString& txtURL, const nsString& desc, const modetype mode,
451 : nsString& outputHTML)
452 : {
453 : // Create *uri from txtURL
454 0 : nsCOMPtr<nsIURI> uri;
455 : nsresult rv;
456 : // Lazily initialize mIOService
457 0 : if (!mIOService)
458 : {
459 0 : mIOService = do_GetIOService();
460 :
461 0 : if (!mIOService)
462 0 : return false;
463 : }
464 :
465 : // See if the url should be linkified.
466 0 : NS_ConvertUTF16toUTF8 utf8URL(txtURL);
467 0 : if (!ShouldLinkify(utf8URL))
468 0 : return false;
469 :
470 : // it would be faster if we could just check to see if there is a protocol
471 : // handler for the url and return instead of actually trying to create a url...
472 0 : rv = mIOService->NewURI(utf8URL, nsnull, nsnull, getter_AddRefs(uri));
473 :
474 : // Real work
475 0 : if (NS_SUCCEEDED(rv) && uri)
476 : {
477 0 : outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
478 0 : switch(mode)
479 : {
480 : case RFC1738:
481 0 : outputHTML.AppendLiteral("rfc1738");
482 0 : break;
483 : case RFC2396E:
484 0 : outputHTML.AppendLiteral("rfc2396E");
485 0 : break;
486 : case freetext:
487 0 : outputHTML.AppendLiteral("freetext");
488 0 : break;
489 : case abbreviated:
490 0 : outputHTML.AppendLiteral("abbreviated");
491 0 : break;
492 0 : default: break;
493 : }
494 0 : nsAutoString escapedURL(txtURL);
495 0 : EscapeStr(escapedURL, true);
496 :
497 0 : outputHTML.AppendLiteral("\" href=\"");
498 0 : outputHTML += escapedURL;
499 0 : outputHTML.AppendLiteral("\">");
500 0 : outputHTML += desc;
501 0 : outputHTML.AppendLiteral("</a>");
502 0 : return true;
503 : }
504 : else
505 0 : return false;
506 : }
507 :
508 0 : NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const PRUnichar * aInString, PRInt32 aInLength, PRInt32 aPos, PRInt32 * aStartPos, PRInt32 * aEndPos)
509 : {
510 : // call FindURL on the passed in string
511 0 : nsAutoString outputHTML; // we'll ignore the generated output HTML
512 :
513 0 : *aStartPos = -1;
514 0 : *aEndPos = -1;
515 :
516 0 : FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
517 :
518 0 : return NS_OK;
519 : }
520 :
521 : bool
522 0 : mozTXTToHTMLConv::FindURL(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
523 : const PRUint32 whathasbeendone,
524 : nsString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter)
525 : {
526 : enum statetype {unchecked, invalid, startok, endok, success};
527 : static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
528 :
529 : statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
530 : /* I don't like this abuse of enums as index for the array,
531 : but I don't know a better method */
532 :
533 : // Define, which modes to check
534 : /* all modes but abbreviated are checked for text[pos] == ':',
535 : only abbreviated for '.', RFC2396E and abbreviated for '@' */
536 0 : for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
537 : iState = modetype(iState + 1))
538 0 : state[iState] = aInString[pos] == ':' ? unchecked : invalid;
539 0 : switch (aInString[pos])
540 : {
541 : case '@':
542 0 : state[RFC2396E] = unchecked;
543 : // no break here
544 : case '.':
545 0 : state[abbreviated] = unchecked;
546 0 : break;
547 : case ':':
548 0 : state[abbreviated] = invalid;
549 0 : break;
550 : default:
551 0 : break;
552 : }
553 :
554 : // Test, first successful mode wins, sequence defined by |ranking|
555 0 : PRInt32 iCheck = 0; // the currently tested modetype
556 0 : modetype check = ranking[iCheck];
557 0 : for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
558 : iCheck++)
559 : /* check state from last run.
560 : If this is the first, check this one, which isn't = success yet */
561 : {
562 0 : check = ranking[iCheck];
563 :
564 : PRUint32 start, end;
565 :
566 0 : if (state[check] == unchecked)
567 0 : if (FindURLStart(aInString, aInLength, pos, check, start))
568 0 : state[check] = startok;
569 :
570 0 : if (state[check] == startok)
571 0 : if (FindURLEnd(aInString, aInLength, pos, check, start, end))
572 0 : state[check] = endok;
573 :
574 0 : if (state[check] == endok)
575 : {
576 0 : nsAutoString txtURL, desc;
577 : PRInt32 resultReplaceBefore, resultReplaceAfter;
578 :
579 : CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
580 : txtURL, desc,
581 0 : resultReplaceBefore, resultReplaceAfter);
582 :
583 0 : if (aInString[pos] != ':')
584 : {
585 0 : nsAutoString temp = txtURL;
586 0 : txtURL.SetLength(0);
587 0 : CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
588 : }
589 :
590 0 : if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
591 0 : outputHTML))
592 : {
593 0 : replaceBefore = resultReplaceBefore;
594 0 : replaceAfter = resultReplaceAfter;
595 0 : state[check] = success;
596 : }
597 : } // if
598 : } // for
599 0 : return state[check] == success;
600 : }
601 :
602 : bool
603 0 : mozTXTToHTMLConv::ItMatchesDelimited(const PRUnichar * aInString,
604 : PRInt32 aInLength, const PRUnichar* rep, PRInt32 aRepLen,
605 : LIMTYPE before, LIMTYPE after)
606 : {
607 :
608 : // this little method gets called a LOT. I found we were spending a
609 : // lot of time just calculating the length of the variable "rep"
610 : // over and over again every time we called it. So we're now passing
611 : // an integer in here.
612 0 : PRInt32 textLen = aInLength;
613 :
614 0 : if
615 : (
616 : ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
617 : && textLen < aRepLen) ||
618 : ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
619 : && textLen < aRepLen + 1) ||
620 : (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
621 : && textLen < aRepLen + 2)
622 : )
623 0 : return false;
624 :
625 0 : PRUnichar text0 = aInString[0];
626 0 : PRUnichar textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
627 :
628 0 : if
629 : (
630 : (before == LT_ALPHA
631 0 : && !nsCRT::IsAsciiAlpha(text0)) ||
632 : (before == LT_DIGIT
633 0 : && !nsCRT::IsAsciiDigit(text0)) ||
634 : (before == LT_DELIMITER
635 : &&
636 : (
637 0 : nsCRT::IsAsciiAlpha(text0) ||
638 0 : nsCRT::IsAsciiDigit(text0) ||
639 : text0 == *rep
640 : )) ||
641 : (after == LT_ALPHA
642 0 : && !nsCRT::IsAsciiAlpha(textAfterPos)) ||
643 : (after == LT_DIGIT
644 0 : && !nsCRT::IsAsciiDigit(textAfterPos)) ||
645 : (after == LT_DELIMITER
646 : &&
647 : (
648 0 : nsCRT::IsAsciiAlpha(textAfterPos) ||
649 0 : nsCRT::IsAsciiDigit(textAfterPos) ||
650 : textAfterPos == *rep
651 : )) ||
652 0 : !Substring(Substring(aInString, aInString+aInLength),
653 : (before == LT_IGNORE ? 0 : 1),
654 0 : aRepLen).Equals(Substring(rep, rep+aRepLen),
655 0 : nsCaseInsensitiveStringComparator())
656 : )
657 0 : return false;
658 :
659 0 : return true;
660 : }
661 :
662 : PRUint32
663 0 : mozTXTToHTMLConv::NumberOfMatches(const PRUnichar * aInString, PRInt32 aInStringLength,
664 : const PRUnichar* rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after)
665 : {
666 0 : PRUint32 result = 0;
667 :
668 0 : for (PRInt32 i = 0; i < aInStringLength; i++)
669 : {
670 0 : const PRUnichar * indexIntoString = &aInString[i];
671 0 : if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
672 0 : result++;
673 : }
674 0 : return result;
675 : }
676 :
677 :
678 : // NOTE: the converted html for the phrase is appended to aOutString
679 : // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
680 : bool
681 0 : mozTXTToHTMLConv::StructPhraseHit(const PRUnichar * aInString, PRInt32 aInStringLength, bool col0,
682 : const PRUnichar* tagTXT, PRInt32 aTagTXTLen,
683 : const char* tagHTML, const char* attributeHTML,
684 : nsString& aOutString, PRUint32& openTags)
685 : {
686 : /* We're searching for the following pattern:
687 : LT_DELIMITER - "*" - ALPHA -
688 : [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
689 : <strong> is only inserted, if existence of a pair could be verified
690 : We use the first opening/closing tag, if we can choose */
691 :
692 0 : const PRUnichar * newOffset = aInString;
693 0 : PRInt32 newLength = aInStringLength;
694 0 : if (!col0) // skip the first element?
695 : {
696 0 : newOffset = &aInString[1];
697 0 : newLength = aInStringLength - 1;
698 : }
699 :
700 : // opening tag
701 0 : if
702 : (
703 : ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
704 0 : (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
705 : && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen,
706 0 : LT_ALPHA, LT_DELIMITER) // remaining closing tags
707 : > openTags
708 : )
709 : {
710 0 : openTags++;
711 0 : aOutString.AppendLiteral("<");
712 0 : aOutString.AppendASCII(tagHTML);
713 0 : aOutString.Append(PRUnichar(' '));
714 0 : aOutString.AppendASCII(attributeHTML);
715 0 : aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
716 0 : aOutString.Append(tagTXT);
717 0 : aOutString.AppendLiteral("</span>");
718 0 : return true;
719 : }
720 :
721 : // closing tag
722 0 : else if (openTags > 0
723 0 : && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
724 : {
725 0 : openTags--;
726 0 : aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
727 0 : aOutString.Append(tagTXT);
728 0 : aOutString.AppendLiteral("</span></");
729 0 : aOutString.AppendASCII(tagHTML);
730 0 : aOutString.Append(PRUnichar('>'));
731 0 : return true;
732 : }
733 :
734 0 : return false;
735 : }
736 :
737 :
738 : bool
739 0 : mozTXTToHTMLConv::SmilyHit(const PRUnichar * aInString, PRInt32 aLength, bool col0,
740 : const char* tagTXT, const char* imageName,
741 : nsString& outputHTML, PRInt32& glyphTextLen)
742 : {
743 0 : if ( !aInString || !tagTXT || !imageName )
744 0 : return false;
745 :
746 0 : PRInt32 tagLen = nsCRT::strlen(tagTXT);
747 :
748 0 : PRUint32 delim = (col0 ? 0 : 1) + tagLen;
749 :
750 0 : if
751 : (
752 0 : (col0 || IsSpace(aInString[0]))
753 : &&
754 : (
755 : aLength <= PRInt32(delim) ||
756 0 : IsSpace(aInString[delim]) ||
757 : (aLength > PRInt32(delim + 1)
758 : &&
759 : (
760 0 : aInString[delim] == '.' ||
761 0 : aInString[delim] == ',' ||
762 0 : aInString[delim] == ';' ||
763 0 : aInString[delim] == '8' ||
764 0 : aInString[delim] == '>' ||
765 0 : aInString[delim] == '!' ||
766 0 : aInString[delim] == '?'
767 : )
768 0 : && IsSpace(aInString[delim + 1]))
769 : )
770 0 : && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
771 0 : col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
772 : // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
773 : )
774 : {
775 0 : if (!col0)
776 : {
777 0 : outputHTML.Truncate();
778 0 : outputHTML.Append(PRUnichar(' '));
779 : }
780 :
781 0 : outputHTML.AppendLiteral("<span class=\""); // <span class="
782 0 : AppendASCIItoUTF16(imageName, outputHTML); // e.g. smiley-frown
783 0 : outputHTML.AppendLiteral("\" title=\""); // " title="
784 0 : AppendASCIItoUTF16(tagTXT, outputHTML); // smiley tooltip
785 0 : outputHTML.AppendLiteral("\"><span>"); // "><span>
786 0 : AppendASCIItoUTF16(tagTXT, outputHTML); // original text
787 0 : outputHTML.AppendLiteral("</span></span>"); // </span></span>
788 0 : glyphTextLen = (col0 ? 0 : 1) + tagLen;
789 0 : return true;
790 : }
791 :
792 0 : return false;
793 : }
794 :
795 : // the glyph is appended to aOutputString instead of the original string...
796 : bool
797 0 : mozTXTToHTMLConv::GlyphHit(const PRUnichar * aInString, PRInt32 aInLength, bool col0,
798 : nsString& aOutputString, PRInt32& glyphTextLen)
799 : {
800 0 : PRUnichar text0 = aInString[0];
801 0 : PRUnichar text1 = aInString[1];
802 0 : PRUnichar firstChar = (col0 ? text0 : text1);
803 :
804 : // temporary variable used to store the glyph html text
805 0 : nsAutoString outputHTML;
806 : bool bTestSmilie;
807 : bool bArg;
808 : int i;
809 :
810 : // refactor some of this mess to avoid code duplication and speed execution a bit
811 : // there are two cases that need to be tried one after another. To avoid a lot of
812 : // duplicate code, rolling into a loop
813 :
814 0 : i = 0;
815 0 : while ( i < 2 )
816 : {
817 0 : bTestSmilie = false;
818 0 : if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
819 : {
820 : // first test passed
821 :
822 0 : bTestSmilie = true;
823 0 : bArg = col0;
824 : }
825 0 : if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
826 : {
827 : // second test passed
828 :
829 0 : bTestSmilie = true;
830 0 : bArg = false;
831 : }
832 0 : if ( bTestSmilie && (
833 : SmilyHit(aInString, aInLength, bArg,
834 : ":-)",
835 : "moz-smiley-s1", // smile
836 0 : outputHTML, glyphTextLen) ||
837 :
838 : SmilyHit(aInString, aInLength, bArg,
839 : ":)",
840 : "moz-smiley-s1", // smile
841 0 : outputHTML, glyphTextLen) ||
842 :
843 : SmilyHit(aInString, aInLength, bArg,
844 : ":-D",
845 : "moz-smiley-s5", // laughing
846 0 : outputHTML, glyphTextLen) ||
847 :
848 : SmilyHit(aInString, aInLength, bArg,
849 : ":-(",
850 : "moz-smiley-s2", // frown
851 0 : outputHTML, glyphTextLen) ||
852 :
853 : SmilyHit(aInString, aInLength, bArg,
854 : ":(",
855 : "moz-smiley-s2", // frown
856 0 : outputHTML, glyphTextLen) ||
857 :
858 : SmilyHit(aInString, aInLength, bArg,
859 : ":-[",
860 : "moz-smiley-s6", // embarassed
861 0 : outputHTML, glyphTextLen) ||
862 :
863 : SmilyHit(aInString, aInLength, bArg,
864 : ";-)",
865 : "moz-smiley-s3", // wink
866 0 : outputHTML, glyphTextLen) ||
867 :
868 : SmilyHit(aInString, aInLength, col0,
869 : ";)",
870 : "moz-smiley-s3", // wink
871 0 : outputHTML, glyphTextLen) ||
872 :
873 : SmilyHit(aInString, aInLength, bArg,
874 : ":-\\",
875 : "moz-smiley-s7", // undecided
876 0 : outputHTML, glyphTextLen) ||
877 :
878 : SmilyHit(aInString, aInLength, bArg,
879 : ":-P",
880 : "moz-smiley-s4", // tongue
881 0 : outputHTML, glyphTextLen) ||
882 :
883 : SmilyHit(aInString, aInLength, bArg,
884 : ";-P",
885 : "moz-smiley-s4", // tongue
886 0 : outputHTML, glyphTextLen) ||
887 :
888 : SmilyHit(aInString, aInLength, bArg,
889 : "=-O",
890 : "moz-smiley-s8", // surprise
891 0 : outputHTML, glyphTextLen) ||
892 :
893 : SmilyHit(aInString, aInLength, bArg,
894 : ":-*",
895 : "moz-smiley-s9", // kiss
896 0 : outputHTML, glyphTextLen) ||
897 :
898 : SmilyHit(aInString, aInLength, bArg,
899 : ">:o",
900 : "moz-smiley-s10", // yell
901 0 : outputHTML, glyphTextLen) ||
902 :
903 : SmilyHit(aInString, aInLength, bArg,
904 : ">:-o",
905 : "moz-smiley-s10", // yell
906 0 : outputHTML, glyphTextLen) ||
907 :
908 : SmilyHit(aInString, aInLength, bArg,
909 : "8-)",
910 : "moz-smiley-s11", // cool
911 0 : outputHTML, glyphTextLen) ||
912 :
913 : SmilyHit(aInString, aInLength, bArg,
914 : ":-$",
915 : "moz-smiley-s12", // money
916 0 : outputHTML, glyphTextLen) ||
917 :
918 : SmilyHit(aInString, aInLength, bArg,
919 : ":-!",
920 : "moz-smiley-s13", // foot
921 0 : outputHTML, glyphTextLen) ||
922 :
923 : SmilyHit(aInString, aInLength, bArg,
924 : "O:-)",
925 : "moz-smiley-s14", // innocent
926 0 : outputHTML, glyphTextLen) ||
927 :
928 : SmilyHit(aInString, aInLength, bArg,
929 : ":'(",
930 : "moz-smiley-s15", // cry
931 0 : outputHTML, glyphTextLen) ||
932 :
933 : SmilyHit(aInString, aInLength, bArg,
934 : ":-X",
935 : "moz-smiley-s16", // sealed
936 0 : outputHTML, glyphTextLen)
937 : )
938 : )
939 : {
940 0 : aOutputString.Append(outputHTML);
941 0 : return true;
942 : }
943 0 : i++;
944 : }
945 0 : if (text0 == '\f')
946 : {
947 0 : aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
948 0 : glyphTextLen = 1;
949 0 : return true;
950 : }
951 0 : if (text0 == '+' || text1 == '+')
952 : {
953 0 : if (ItMatchesDelimited(aInString, aInLength,
954 0 : NS_LITERAL_STRING(" +/-").get(), 4,
955 0 : LT_IGNORE, LT_IGNORE))
956 : {
957 0 : aOutputString.AppendLiteral(" ±");
958 0 : glyphTextLen = 4;
959 0 : return true;
960 : }
961 0 : if (col0 && ItMatchesDelimited(aInString, aInLength,
962 0 : NS_LITERAL_STRING("+/-").get(), 3,
963 0 : LT_IGNORE, LT_IGNORE))
964 : {
965 0 : aOutputString.AppendLiteral("±");
966 0 : glyphTextLen = 3;
967 0 : return true;
968 : }
969 : }
970 :
971 : // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5
972 : // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
973 0 : if
974 : (
975 : text1 == '^'
976 : &&
977 : (
978 0 : nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) ||
979 : text0 == ')' || text0 == ']' || text0 == '}'
980 : )
981 : &&
982 : (
983 0 : (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) ||
984 0 : (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3]))
985 : )
986 : )
987 : {
988 : // Find first non-digit
989 0 : PRInt32 delimPos = 3; // skip "^" and first digit (or '-')
990 0 : for (; delimPos < aInLength
991 : &&
992 : (
993 0 : nsCRT::IsAsciiDigit(aInString[delimPos]) ||
994 0 : (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
995 0 : nsCRT::IsAsciiDigit(aInString[delimPos + 1]))
996 : );
997 : delimPos++)
998 : ;
999 :
1000 0 : if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
1001 : {
1002 0 : return false;
1003 : }
1004 :
1005 0 : outputHTML.Truncate();
1006 0 : outputHTML += text0;
1007 : outputHTML.AppendLiteral(
1008 : "<sup class=\"moz-txt-sup\">"
1009 : "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
1010 0 : "^</span>");
1011 :
1012 0 : aOutputString.Append(outputHTML);
1013 0 : aOutputString.Append(&aInString[2], delimPos - 2);
1014 0 : aOutputString.AppendLiteral("</sup>");
1015 :
1016 0 : glyphTextLen = delimPos /* - 1 + 1 */ ;
1017 0 : return true;
1018 : }
1019 : /*
1020 : The following strings are not substituted:
1021 : |TXT |HTML |Reason
1022 : +------+---------+----------
1023 : -> ← Bug #454
1024 : => ⇐ dito
1025 : <- → dito
1026 : <= ⇒ dito
1027 : (tm) ™ dito
1028 : 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ...
1029 : 3/4 ¾ dito
1030 : 1/2 ½ similar
1031 : */
1032 0 : return false;
1033 : }
1034 :
1035 : /***************************************************************************
1036 : Library-internal Interface
1037 : ****************************************************************************/
1038 :
1039 0 : mozTXTToHTMLConv::mozTXTToHTMLConv()
1040 : {
1041 0 : }
1042 :
1043 0 : mozTXTToHTMLConv::~mozTXTToHTMLConv()
1044 : {
1045 0 : }
1046 :
1047 0 : NS_IMPL_ISUPPORTS4(mozTXTToHTMLConv,
1048 : mozITXTToHTMLConv,
1049 : nsIStreamConverter,
1050 : nsIStreamListener,
1051 : nsIRequestObserver)
1052 :
1053 : PRInt32
1054 0 : mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line,
1055 : PRUint32& logLineStart)
1056 : {
1057 0 : PRInt32 result = 0;
1058 0 : PRInt32 lineLength = nsCRT::strlen(line);
1059 :
1060 0 : bool moreCites = true;
1061 0 : while (moreCites)
1062 : {
1063 : /* E.g. the following lines count as quote:
1064 :
1065 : > text
1066 : //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
1067 : >text
1068 : //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
1069 : > text
1070 : ] text
1071 : USER> text
1072 : USER] text
1073 : //#endif
1074 :
1075 : logLineStart is the position of "t" in this example
1076 : */
1077 0 : PRUint32 i = logLineStart;
1078 :
1079 : #ifdef QUOTE_RECOGNITION_AGGRESSIVE
1080 : for (; PRInt32(i) < lineLength && IsSpace(line[i]); i++)
1081 : ;
1082 : for (; PRInt32(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
1083 : && nsCRT::IsUpper(line[i]) ; i++)
1084 : ;
1085 : if (PRInt32(i) < lineLength && (line[i] == '>' || line[i] == ']'))
1086 : #else
1087 0 : if (PRInt32(i) < lineLength && line[i] == '>')
1088 : #endif
1089 : {
1090 0 : i++;
1091 0 : if (PRInt32(i) < lineLength && line[i] == ' ')
1092 0 : i++;
1093 : // sendmail/mbox
1094 : // Placed here for performance increase
1095 0 : const PRUnichar * indexString = &line[logLineStart];
1096 : // here, |logLineStart < lineLength| is always true
1097 0 : PRUint32 minlength = MinInt(6,nsCRT::strlen(indexString));
1098 0 : if (Substring(indexString,
1099 0 : indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
1100 0 : nsCaseInsensitiveStringComparator()))
1101 : //XXX RFC2646
1102 0 : moreCites = false;
1103 : else
1104 : {
1105 0 : result++;
1106 0 : logLineStart = i;
1107 0 : }
1108 : }
1109 : else
1110 0 : moreCites = false;
1111 : }
1112 :
1113 0 : return result;
1114 : }
1115 :
1116 : void
1117 0 : mozTXTToHTMLConv::ScanTXT(const PRUnichar * aInString, PRInt32 aInStringLength, PRUint32 whattodo, nsString& aOutString)
1118 : {
1119 0 : bool doURLs = 0 != (whattodo & kURLs);
1120 0 : bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
1121 0 : bool doStructPhrase = 0 != (whattodo & kStructPhrase);
1122 :
1123 0 : PRUint32 structPhrase_strong = 0; // Number of currently open tags
1124 0 : PRUint32 structPhrase_underline = 0;
1125 0 : PRUint32 structPhrase_italic = 0;
1126 0 : PRUint32 structPhrase_code = 0;
1127 :
1128 0 : nsAutoString outputHTML; // moved here for performance increase
1129 :
1130 0 : for(PRUint32 i = 0; PRInt32(i) < aInStringLength;)
1131 : {
1132 0 : if (doGlyphSubstitution)
1133 : {
1134 : PRInt32 glyphTextLen;
1135 0 : if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
1136 : {
1137 0 : i += glyphTextLen;
1138 0 : continue;
1139 : }
1140 : }
1141 :
1142 0 : if (doStructPhrase)
1143 : {
1144 0 : const PRUnichar * newOffset = aInString;
1145 0 : PRInt32 newLength = aInStringLength;
1146 0 : if (i > 0 ) // skip the first element?
1147 : {
1148 0 : newOffset = &aInString[i-1];
1149 0 : newLength = aInStringLength - i + 1;
1150 : }
1151 :
1152 0 : switch (aInString[i]) // Performance increase
1153 : {
1154 : case '*':
1155 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1156 0 : NS_LITERAL_STRING("*").get(), 1,
1157 : "b", "class=\"moz-txt-star\"",
1158 0 : aOutString, structPhrase_strong))
1159 : {
1160 0 : i++;
1161 0 : continue;
1162 : }
1163 0 : break;
1164 : case '/':
1165 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1166 0 : NS_LITERAL_STRING("/").get(), 1,
1167 : "i", "class=\"moz-txt-slash\"",
1168 0 : aOutString, structPhrase_italic))
1169 : {
1170 0 : i++;
1171 0 : continue;
1172 : }
1173 0 : break;
1174 : case '_':
1175 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1176 0 : NS_LITERAL_STRING("_").get(), 1,
1177 : "span" /* <u> is deprecated */,
1178 : "class=\"moz-txt-underscore\"",
1179 0 : aOutString, structPhrase_underline))
1180 : {
1181 0 : i++;
1182 0 : continue;
1183 : }
1184 0 : break;
1185 : case '|':
1186 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1187 0 : NS_LITERAL_STRING("|").get(), 1,
1188 : "code", "class=\"moz-txt-verticalline\"",
1189 0 : aOutString, structPhrase_code))
1190 : {
1191 0 : i++;
1192 0 : continue;
1193 : }
1194 0 : break;
1195 : }
1196 : }
1197 :
1198 0 : if (doURLs)
1199 : {
1200 0 : switch (aInString[i])
1201 : {
1202 : case ':':
1203 : case '@':
1204 : case '.':
1205 0 : if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
1206 : {
1207 : PRInt32 replaceBefore;
1208 : PRInt32 replaceAfter;
1209 0 : if (FindURL(aInString, aInStringLength, i, whattodo,
1210 0 : outputHTML, replaceBefore, replaceAfter)
1211 : && structPhrase_strong + structPhrase_italic +
1212 : structPhrase_underline + structPhrase_code == 0
1213 : /* workaround for bug #19445 */ )
1214 : {
1215 0 : aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
1216 0 : aOutString += outputHTML;
1217 0 : i += replaceAfter + 1;
1218 0 : continue;
1219 : }
1220 : }
1221 0 : break;
1222 : } //switch
1223 : }
1224 :
1225 0 : switch (aInString[i])
1226 : {
1227 : // Special symbols
1228 : case '<':
1229 : case '>':
1230 : case '&':
1231 0 : EscapeChar(aInString[i], aOutString, false);
1232 0 : i++;
1233 0 : break;
1234 : // Normal characters
1235 : default:
1236 0 : aOutString += aInString[i];
1237 0 : i++;
1238 0 : break;
1239 : }
1240 : }
1241 0 : }
1242 :
1243 : void
1244 0 : mozTXTToHTMLConv::ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString)
1245 : {
1246 : // some common variables we were recalculating
1247 : // every time inside the for loop...
1248 0 : PRInt32 lengthOfInString = aInString.Length();
1249 0 : const PRUnichar * uniBuffer = aInString.get();
1250 :
1251 : #ifdef DEBUG_BenB_Perf
1252 : PRTime parsing_start = PR_IntervalNow();
1253 : #endif
1254 :
1255 : // Look for simple entities not included in a tags and scan them.
1256 : /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
1257 : or in a tag ("<!--[...]-->").
1258 : Unescape the rest (text between tags) and pass it to ScanTXT. */
1259 0 : for (PRInt32 i = 0; i < lengthOfInString;)
1260 : {
1261 0 : if (aInString[i] == '<') // html tag
1262 : {
1263 0 : PRUint32 start = PRUint32(i);
1264 0 : if (nsCRT::ToLower((char)aInString[PRUint32(i) + 1]) == 'a')
1265 : // if a tag, skip until </a>
1266 : {
1267 0 : i = aInString.Find("</a>", true, i);
1268 0 : if (i == kNotFound)
1269 0 : i = lengthOfInString;
1270 : else
1271 0 : i += 4;
1272 : }
1273 0 : else if (aInString[PRUint32(i) + 1] == '!' && aInString[PRUint32(i) + 2] == '-' &&
1274 0 : aInString[PRUint32(i) + 3] == '-')
1275 : //if out-commended code, skip until -->
1276 : {
1277 0 : i = aInString.Find("-->", false, i);
1278 0 : if (i == kNotFound)
1279 0 : i = lengthOfInString;
1280 : else
1281 0 : i += 3;
1282 :
1283 : }
1284 : else // just skip tag (attributes etc.)
1285 : {
1286 0 : i = aInString.FindChar('>', i);
1287 0 : if (i == kNotFound)
1288 0 : i = lengthOfInString;
1289 : else
1290 0 : i++;
1291 : }
1292 0 : aOutString.Append(&uniBuffer[start], PRUint32(i) - start);
1293 : }
1294 : else
1295 : {
1296 0 : PRUint32 start = PRUint32(i);
1297 0 : i = aInString.FindChar('<', i);
1298 0 : if (i == kNotFound)
1299 0 : i = lengthOfInString;
1300 :
1301 0 : nsString tempString;
1302 0 : tempString.SetCapacity(PRUint32((PRUint32(i) - start) * growthRate));
1303 0 : UnescapeStr(uniBuffer, start, PRUint32(i) - start, tempString);
1304 0 : ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
1305 : }
1306 : }
1307 :
1308 : #ifdef DEBUG_BenB_Perf
1309 : printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
1310 : #endif
1311 0 : }
1312 :
1313 : /****************************************************************************
1314 : XPCOM Interface
1315 : *****************************************************************************/
1316 :
1317 : NS_IMETHODIMP
1318 0 : mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
1319 : const char *aFromType,
1320 : const char *aToType,
1321 : nsISupports *aCtxt, nsIInputStream **_retval)
1322 : {
1323 0 : return NS_ERROR_NOT_IMPLEMENTED;
1324 : }
1325 :
1326 : NS_IMETHODIMP
1327 0 : mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
1328 : const char *aToType,
1329 : nsIStreamListener *aListener, nsISupports *aCtxt) {
1330 0 : return NS_ERROR_NOT_IMPLEMENTED;
1331 : }
1332 :
1333 : NS_IMETHODIMP
1334 0 : mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
1335 : nsIInputStream *inStr, PRUint32 sourceOffset,
1336 : PRUint32 count)
1337 : {
1338 0 : return NS_ERROR_NOT_IMPLEMENTED;
1339 : }
1340 :
1341 : NS_IMETHODIMP
1342 0 : mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
1343 : {
1344 0 : return NS_ERROR_NOT_IMPLEMENTED;
1345 : }
1346 :
1347 : NS_IMETHODIMP
1348 0 : mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
1349 : nsresult aStatus)
1350 : {
1351 0 : return NS_ERROR_NOT_IMPLEMENTED;
1352 : }
1353 :
1354 : NS_IMETHODIMP
1355 0 : mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line, PRUint32 *logLineStart,
1356 : PRUint32 *_retval)
1357 : {
1358 0 : if (!logLineStart || !_retval || !line)
1359 0 : return NS_ERROR_NULL_POINTER;
1360 0 : *_retval = CiteLevelTXT(line, *logLineStart);
1361 0 : return NS_OK;
1362 : }
1363 :
1364 : NS_IMETHODIMP
1365 0 : mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo,
1366 : PRUnichar **_retval)
1367 : {
1368 0 : NS_ENSURE_ARG(text);
1369 :
1370 : // FIX ME!!!
1371 0 : nsString outString;
1372 0 : PRInt32 inLength = nsCRT::strlen(text);
1373 : // by setting a large capacity up front, we save time
1374 : // when appending characters to the output string because we don't
1375 : // need to reallocate and re-copy the characters already in the out String.
1376 0 : NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
1377 0 : if (inLength == 0) {
1378 0 : *_retval = nsCRT::strdup(text);
1379 0 : return NS_OK;
1380 : }
1381 :
1382 0 : outString.SetCapacity(PRUint32(inLength * growthRate));
1383 0 : ScanTXT(text, inLength, whattodo, outString);
1384 :
1385 0 : *_retval = ToNewUnicode(outString);
1386 0 : return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1387 : }
1388 :
1389 : NS_IMETHODIMP
1390 0 : mozTXTToHTMLConv::ScanHTML(const PRUnichar *text, PRUint32 whattodo,
1391 : PRUnichar **_retval)
1392 : {
1393 0 : NS_ENSURE_ARG(text);
1394 :
1395 : // FIX ME!!!
1396 0 : nsString outString;
1397 0 : nsString inString (text); // look at this nasty extra copy of the entire input buffer!
1398 0 : outString.SetCapacity(PRUint32(inString.Length() * growthRate));
1399 :
1400 0 : ScanHTML(inString, whattodo, outString);
1401 0 : *_retval = ToNewUnicode(outString);
1402 0 : return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1403 : }
1404 :
1405 : nsresult
1406 0 : MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
1407 : {
1408 0 : NS_PRECONDITION(aConv != nsnull, "null ptr");
1409 0 : if (!aConv)
1410 0 : return NS_ERROR_NULL_POINTER;
1411 :
1412 0 : *aConv = new mozTXTToHTMLConv();
1413 0 : if (!*aConv)
1414 0 : return NS_ERROR_OUT_OF_MEMORY;
1415 :
1416 0 : NS_ADDREF(*aConv);
1417 : // return (*aConv)->Init();
1418 0 : return NS_OK;
1419 : }
|