1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsLinebreakConverter.h"
39 :
40 : #include "nsMemory.h"
41 : #include "nsCRT.h"
42 :
43 :
44 : /*----------------------------------------------------------------------------
45 : GetLinebreakString
46 :
47 : Could make this inline
48 : ----------------------------------------------------------------------------*/
49 0 : static const char* GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType)
50 : {
51 : static const char* const sLinebreaks[] = {
52 : "", // any
53 : NS_LINEBREAK, // platform
54 : LFSTR, // content
55 : CRLF, // net
56 : CRSTR, // Mac
57 : LFSTR, // Unix
58 : CRLF, // Windows
59 : " ", // space
60 : nsnull
61 : };
62 :
63 0 : return sLinebreaks[aBreakType];
64 : }
65 :
66 :
67 : /*----------------------------------------------------------------------------
68 : AppendLinebreak
69 :
70 : Wee inline method to append a line break. Modifies ioDest.
71 : ----------------------------------------------------------------------------*/
72 : template<class T>
73 0 : void AppendLinebreak(T*& ioDest, const char* lineBreakStr)
74 : {
75 0 : *ioDest++ = *lineBreakStr;
76 :
77 0 : if (lineBreakStr[1])
78 0 : *ioDest++ = lineBreakStr[1];
79 0 : }
80 :
81 : /*----------------------------------------------------------------------------
82 : CountChars
83 :
84 : Counts occurrences of breakStr in aSrc
85 : ----------------------------------------------------------------------------*/
86 : template<class T>
87 0 : PRInt32 CountLinebreaks(const T* aSrc, PRInt32 inLen, const char* breakStr)
88 : {
89 0 : const T* src = aSrc;
90 0 : const T* srcEnd = aSrc + inLen;
91 0 : PRInt32 theCount = 0;
92 :
93 0 : while (src < srcEnd)
94 : {
95 0 : if (*src == *breakStr)
96 : {
97 0 : src++;
98 :
99 0 : if (breakStr[1])
100 : {
101 0 : if (src < srcEnd && *src == breakStr[1])
102 : {
103 0 : src++;
104 0 : theCount++;
105 : }
106 : }
107 : else
108 : {
109 0 : theCount++;
110 : }
111 : }
112 : else
113 : {
114 0 : src++;
115 : }
116 : }
117 :
118 0 : return theCount;
119 : }
120 :
121 :
122 : /*----------------------------------------------------------------------------
123 : ConvertBreaks
124 :
125 : ioLen *includes* a terminating null, if any
126 : ----------------------------------------------------------------------------*/
127 : template<class T>
128 0 : static T* ConvertBreaks(const T* inSrc, PRInt32& ioLen, const char* srcBreak, const char* destBreak)
129 : {
130 0 : NS_ASSERTION(inSrc && srcBreak && destBreak, "Got a null string");
131 :
132 0 : T* resultString = nsnull;
133 :
134 : // handle the no conversion case
135 0 : if (nsCRT::strcmp(srcBreak, destBreak) == 0)
136 : {
137 0 : resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
138 0 : if (!resultString) return nsnull;
139 0 : memcpy(resultString, inSrc, sizeof(T) * ioLen); // includes the null, if any
140 0 : return resultString;
141 : }
142 :
143 0 : PRInt32 srcBreakLen = strlen(srcBreak);
144 0 : PRInt32 destBreakLen = strlen(destBreak);
145 :
146 : // handle the easy case, where the string length does not change, and the
147 : // breaks are only 1 char long, i.e. CR <-> LF
148 0 : if (srcBreakLen == destBreakLen && srcBreakLen == 1)
149 : {
150 0 : resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
151 0 : if (!resultString) return nsnull;
152 :
153 0 : const T* src = inSrc;
154 0 : const T* srcEnd = inSrc + ioLen; // includes null, if any
155 0 : T* dst = resultString;
156 :
157 0 : char srcBreakChar = *srcBreak; // we know it's one char long already
158 0 : char dstBreakChar = *destBreak;
159 :
160 0 : while (src < srcEnd)
161 : {
162 0 : if (*src == srcBreakChar)
163 : {
164 0 : *dst++ = dstBreakChar;
165 0 : src++;
166 : }
167 : else
168 : {
169 0 : *dst++ = *src++;
170 : }
171 : }
172 :
173 : // ioLen does not change
174 : }
175 : else
176 : {
177 : // src and dest termination is different length. Do it a slower way.
178 :
179 : // count linebreaks in src. Assumes that chars in 2-char linebreaks are unique.
180 0 : PRInt32 numLinebreaks = CountLinebreaks(inSrc, ioLen, srcBreak);
181 :
182 0 : PRInt32 newBufLen = ioLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen);
183 0 : resultString = (T *)nsMemory::Alloc(sizeof(T) * newBufLen);
184 0 : if (!resultString) return nsnull;
185 :
186 0 : const T* src = inSrc;
187 0 : const T* srcEnd = inSrc + ioLen; // includes null, if any
188 0 : T* dst = resultString;
189 :
190 0 : while (src < srcEnd)
191 : {
192 0 : if (*src == *srcBreak)
193 : {
194 0 : *dst++ = *destBreak;
195 0 : if (destBreak[1])
196 0 : *dst++ = destBreak[1];
197 :
198 0 : src++;
199 0 : if (src < srcEnd && srcBreak[1] && *src == srcBreak[1])
200 0 : src++;
201 : }
202 : else
203 : {
204 0 : *dst++ = *src++;
205 : }
206 : }
207 :
208 0 : ioLen = newBufLen;
209 : }
210 :
211 0 : return resultString;
212 : }
213 :
214 :
215 : /*----------------------------------------------------------------------------
216 : ConvertBreaksInSitu
217 :
218 : Convert breaks in situ. Can only do this if the linebreak length
219 : does not change.
220 : ----------------------------------------------------------------------------*/
221 : template<class T>
222 0 : static void ConvertBreaksInSitu(T* inSrc, PRInt32 inLen, char srcBreak, char destBreak)
223 : {
224 0 : T* src = inSrc;
225 0 : T* srcEnd = inSrc + inLen;
226 :
227 0 : while (src < srcEnd)
228 : {
229 0 : if (*src == srcBreak)
230 0 : *src = destBreak;
231 :
232 0 : src++;
233 : }
234 0 : }
235 :
236 :
237 : /*----------------------------------------------------------------------------
238 : ConvertUnknownBreaks
239 :
240 : Convert unknown line breaks to the specified break.
241 :
242 : This will convert CRLF pairs to one break, and single CR or LF to a break.
243 : ----------------------------------------------------------------------------*/
244 : template<class T>
245 0 : static T* ConvertUnknownBreaks(const T* inSrc, PRInt32& ioLen, const char* destBreak)
246 : {
247 0 : const T* src = inSrc;
248 0 : const T* srcEnd = inSrc + ioLen; // includes null, if any
249 :
250 0 : PRInt32 destBreakLen = strlen(destBreak);
251 0 : PRInt32 finalLen = 0;
252 :
253 0 : while (src < srcEnd)
254 : {
255 0 : if (*src == nsCRT::CR)
256 : {
257 0 : if (src < srcEnd && src[1] == nsCRT::LF)
258 : {
259 : // CRLF
260 0 : finalLen += destBreakLen;
261 0 : src++;
262 : }
263 : else
264 : {
265 : // Lone CR
266 0 : finalLen += destBreakLen;
267 : }
268 : }
269 0 : else if (*src == nsCRT::LF)
270 : {
271 : // Lone LF
272 0 : finalLen += destBreakLen;
273 : }
274 : else
275 : {
276 0 : finalLen++;
277 : }
278 0 : src++;
279 : }
280 :
281 0 : T* resultString = (T *)nsMemory::Alloc(sizeof(T) * finalLen);
282 0 : if (!resultString) return nsnull;
283 :
284 0 : src = inSrc;
285 0 : srcEnd = inSrc + ioLen; // includes null, if any
286 :
287 0 : T* dst = resultString;
288 :
289 0 : while (src < srcEnd)
290 : {
291 0 : if (*src == nsCRT::CR)
292 : {
293 0 : if (src < srcEnd && src[1] == nsCRT::LF)
294 : {
295 : // CRLF
296 0 : AppendLinebreak(dst, destBreak);
297 0 : src++;
298 : }
299 : else
300 : {
301 : // Lone CR
302 0 : AppendLinebreak(dst, destBreak);
303 : }
304 : }
305 0 : else if (*src == nsCRT::LF)
306 : {
307 : // Lone LF
308 0 : AppendLinebreak(dst, destBreak);
309 : }
310 : else
311 : {
312 0 : *dst++ = *src;
313 : }
314 0 : src++;
315 : }
316 :
317 0 : ioLen = finalLen;
318 0 : return resultString;
319 : }
320 :
321 :
322 : /*----------------------------------------------------------------------------
323 : ConvertLineBreaks
324 :
325 : ----------------------------------------------------------------------------*/
326 0 : char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc,
327 : ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
328 : {
329 0 : NS_ASSERTION(aDestBreaks != eLinebreakAny &&
330 : aSrcBreaks != eLinebreakSpace, "Invalid parameter");
331 0 : if (!aSrc) return nsnull;
332 :
333 0 : PRInt32 sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen;
334 :
335 : char* resultString;
336 0 : if (aSrcBreaks == eLinebreakAny)
337 0 : resultString = ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks));
338 : else
339 0 : resultString = ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
340 :
341 0 : if (outLen)
342 0 : *outLen = sourceLen;
343 0 : return resultString;
344 : }
345 :
346 :
347 : /*----------------------------------------------------------------------------
348 : ConvertLineBreaksInSitu
349 :
350 : ----------------------------------------------------------------------------*/
351 0 : nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(char **ioBuffer, ELinebreakType aSrcBreaks,
352 : ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
353 : {
354 0 : NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
355 0 : if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
356 :
357 0 : NS_ASSERTION(aDestBreaks != eLinebreakAny &&
358 : aSrcBreaks != eLinebreakSpace, "Invalid parameter");
359 :
360 0 : PRInt32 sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*ioBuffer) + 1 : aSrcLen;
361 :
362 : // can we convert in-place?
363 0 : const char* srcBreaks = GetLinebreakString(aSrcBreaks);
364 0 : const char* dstBreaks = GetLinebreakString(aDestBreaks);
365 :
366 0 : if ( (aSrcBreaks != eLinebreakAny) &&
367 0 : (strlen(srcBreaks) == 1) &&
368 0 : (strlen(dstBreaks) == 1) )
369 : {
370 0 : ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
371 0 : if (outLen)
372 0 : *outLen = sourceLen;
373 : }
374 : else
375 : {
376 : char* destBuffer;
377 :
378 0 : if (aSrcBreaks == eLinebreakAny)
379 0 : destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
380 : else
381 0 : destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
382 :
383 0 : if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
384 0 : *ioBuffer = destBuffer;
385 0 : if (outLen)
386 0 : *outLen = sourceLen;
387 : }
388 :
389 0 : return NS_OK;
390 : }
391 :
392 :
393 : /*----------------------------------------------------------------------------
394 : ConvertUnicharLineBreaks
395 :
396 : ----------------------------------------------------------------------------*/
397 0 : PRUnichar* nsLinebreakConverter::ConvertUnicharLineBreaks(const PRUnichar* aSrc,
398 : ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
399 : {
400 0 : NS_ASSERTION(aDestBreaks != eLinebreakAny &&
401 : aSrcBreaks != eLinebreakSpace, "Invalid parameter");
402 0 : if (!aSrc) return nsnull;
403 :
404 0 : PRInt32 bufLen = (aSrcLen == kIgnoreLen) ? nsCRT::strlen(aSrc) + 1 : aSrcLen;
405 :
406 : PRUnichar* resultString;
407 0 : if (aSrcBreaks == eLinebreakAny)
408 0 : resultString = ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks));
409 : else
410 0 : resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
411 :
412 0 : if (outLen)
413 0 : *outLen = bufLen;
414 0 : return resultString;
415 : }
416 :
417 :
418 : /*----------------------------------------------------------------------------
419 : ConvertStringLineBreaks
420 :
421 : ----------------------------------------------------------------------------*/
422 0 : nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(PRUnichar **ioBuffer,
423 : ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
424 : {
425 0 : NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
426 0 : if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
427 0 : NS_ASSERTION(aDestBreaks != eLinebreakAny &&
428 : aSrcBreaks != eLinebreakSpace, "Invalid parameter");
429 :
430 0 : PRInt32 sourceLen = (aSrcLen == kIgnoreLen) ? nsCRT::strlen(*ioBuffer) + 1 : aSrcLen;
431 :
432 : // can we convert in-place?
433 0 : const char* srcBreaks = GetLinebreakString(aSrcBreaks);
434 0 : const char* dstBreaks = GetLinebreakString(aDestBreaks);
435 :
436 0 : if ( (aSrcBreaks != eLinebreakAny) &&
437 0 : (strlen(srcBreaks) == 1) &&
438 0 : (strlen(dstBreaks) == 1) )
439 : {
440 0 : ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
441 0 : if (outLen)
442 0 : *outLen = sourceLen;
443 : }
444 : else
445 : {
446 : PRUnichar* destBuffer;
447 :
448 0 : if (aSrcBreaks == eLinebreakAny)
449 0 : destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
450 : else
451 0 : destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
452 :
453 0 : if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
454 0 : *ioBuffer = destBuffer;
455 0 : if (outLen)
456 0 : *outLen = sourceLen;
457 : }
458 :
459 0 : return NS_OK;
460 : }
461 :
462 : /*----------------------------------------------------------------------------
463 : ConvertStringLineBreaks
464 :
465 : ----------------------------------------------------------------------------*/
466 0 : nsresult nsLinebreakConverter::ConvertStringLineBreaks(nsString& ioString,
467 : ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks)
468 : {
469 :
470 0 : NS_ASSERTION(aDestBreaks != eLinebreakAny &&
471 : aSrcBreaks != eLinebreakSpace, "Invalid parameter");
472 :
473 : // nothing to do
474 0 : if (ioString.IsEmpty()) return NS_OK;
475 :
476 : nsresult rv;
477 :
478 : // remember the old buffer in case
479 : // we blow it away later
480 : nsString::char_iterator stringBuf;
481 0 : ioString.BeginWriting(stringBuf);
482 :
483 : PRInt32 newLen;
484 :
485 : rv = ConvertUnicharLineBreaksInSitu(&stringBuf,
486 : aSrcBreaks, aDestBreaks,
487 0 : ioString.Length() + 1, &newLen);
488 0 : if (NS_FAILED(rv)) return rv;
489 :
490 0 : if (stringBuf != ioString.get())
491 0 : ioString.Adopt(stringBuf);
492 :
493 0 : return NS_OK;
494 : }
495 :
496 :
497 :
|