1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is Mozilla Communicator client code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either of the GNU General Public License Version 2 or later (the "GPL"),
26 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : //----------------------------------------------------------------------
39 : // Global functions and data [declaration]
40 : #include "nsUCSupport.h"
41 : #include "nsUnicodeToUTF8.h"
42 : #include <string.h>
43 :
44 21994 : NS_IMPL_ISUPPORTS1(nsUnicodeToUTF8, nsIUnicodeEncoder)
45 :
46 : //----------------------------------------------------------------------
47 : // nsUnicodeToUTF8 class [implementation]
48 :
49 118872 : NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const PRUnichar * aSrc,
50 : PRInt32 aSrcLength,
51 : PRInt32 * aDestLength)
52 : {
53 : // aSrc is interpreted as UTF16, 3 is normally enough.
54 : // But when previous buffer only contains part of the surrogate pair, we
55 : // need to complete it here. If the first word in following buffer is not
56 : // in valid surrogate range, we need to convert the remaining of last buffer
57 : // to 3 bytes.
58 118872 : *aDestLength = 3*aSrcLength + 3;
59 118872 : return NS_OK;
60 : }
61 :
62 118872 : NS_IMETHODIMP nsUnicodeToUTF8::Convert(const PRUnichar * aSrc,
63 : PRInt32 * aSrcLength,
64 : char * aDest,
65 : PRInt32 * aDestLength)
66 : {
67 118872 : const PRUnichar * src = aSrc;
68 118872 : const PRUnichar * srcEnd = aSrc + *aSrcLength;
69 118872 : char * dest = aDest;
70 118872 : PRInt32 destLen = *aDestLength;
71 : PRUint32 n;
72 :
73 : //complete remaining of last conversion
74 118872 : if (mHighSurrogate) {
75 0 : if (src < srcEnd) {
76 0 : *aDestLength = 0;
77 0 : return NS_OK_UENC_MOREINPUT;
78 : }
79 0 : if (*aDestLength < 4) {
80 0 : *aSrcLength = 0;
81 0 : *aDestLength = 0;
82 0 : return NS_OK_UENC_MOREOUTPUT;
83 : }
84 0 : if (*src < (PRUnichar)0xdc00 || *src > (PRUnichar)0xdfff) { //not a pair
85 0 : *dest++ = (char)0xe0 | (mHighSurrogate >> 12);
86 0 : *dest++ = (char)0x80 | ((mHighSurrogate >> 6) & 0x003f);
87 0 : *dest++ = (char)0x80 | (mHighSurrogate & 0x003f);
88 0 : destLen -= 3;
89 : } else {
90 : n = ((mHighSurrogate - (PRUnichar)0xd800) << 10) +
91 0 : (*src - (PRUnichar)0xdc00) + 0x10000;
92 0 : *dest++ = (char)0xf0 | (n >> 18);
93 0 : *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
94 0 : *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
95 0 : *dest++ = (char)0x80 | (n & 0x3f);
96 0 : ++src;
97 0 : destLen -= 4;
98 : }
99 0 : mHighSurrogate = 0;
100 : }
101 :
102 8954016 : while (src < srcEnd) {
103 8716272 : if ( *src <= 0x007f) {
104 8715664 : if (destLen < 1)
105 0 : goto error_more_output;
106 8715664 : *dest++ = (char)*src;
107 8715664 : --destLen;
108 608 : } else if (*src <= 0x07ff) {
109 507 : if (destLen < 2)
110 0 : goto error_more_output;
111 507 : *dest++ = (char)0xc0 | (*src >> 6);
112 507 : *dest++ = (char)0x80 | (*src & 0x003f);
113 507 : destLen -= 2;
114 101 : } else if (*src >= (PRUnichar)0xD800 && *src < (PRUnichar)0xDC00) {
115 9 : if ((src+1) >= srcEnd) {
116 : //we need another surrogate to complete this unicode char
117 0 : mHighSurrogate = *src;
118 0 : *aDestLength = dest - aDest;
119 0 : return NS_OK_UENC_MOREINPUT;
120 : }
121 : //handle surrogate
122 9 : if (destLen < 4)
123 0 : goto error_more_output;
124 18 : if (*(src+1) < (PRUnichar)0xdc00 || *(src+1) > 0xdfff) { //not a pair
125 0 : *dest++ = (char)0xe0 | (*src >> 12);
126 0 : *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
127 0 : *dest++ = (char)0x80 | (*src & 0x003f);
128 0 : destLen -= 3;
129 : } else {
130 9 : n = ((*src - (PRUnichar)0xd800) << 10) + (*(src+1) - (PRUnichar)0xdc00) + (PRUint32)0x10000;
131 9 : *dest++ = (char)0xf0 | (n >> 18);
132 9 : *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
133 9 : *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
134 9 : *dest++ = (char)0x80 | (n & 0x3f);
135 9 : destLen -= 4;
136 9 : ++src;
137 : }
138 : } else {
139 92 : if (destLen < 3)
140 0 : goto error_more_output;
141 : //treat rest of the character as BMP
142 92 : *dest++ = (char)0xe0 | (*src >> 12);
143 92 : *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
144 92 : *dest++ = (char)0x80 | (*src & 0x003f);
145 92 : destLen -= 3;
146 : }
147 8716272 : ++src;
148 : }
149 :
150 118872 : *aDestLength = dest - aDest;
151 118872 : return NS_OK;
152 :
153 : error_more_output:
154 0 : *aSrcLength = src - aSrc;
155 0 : *aDestLength = dest - aDest;
156 0 : return NS_OK_UENC_MOREOUTPUT;
157 : }
158 :
159 32855 : NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, PRInt32 * aDestLength)
160 : {
161 32855 : char * dest = aDest;
162 :
163 32855 : if (mHighSurrogate) {
164 0 : if (*aDestLength < 3) {
165 0 : *aDestLength = 0;
166 0 : return NS_OK_UENC_MOREOUTPUT;
167 : }
168 0 : *dest++ = (char)0xe0 | (mHighSurrogate >> 12);
169 0 : *dest++ = (char)0x80 | ((mHighSurrogate >> 6) & 0x003f);
170 0 : *dest++ = (char)0x80 | (mHighSurrogate & 0x003f);
171 0 : mHighSurrogate = 0;
172 0 : *aDestLength = 3;
173 0 : return NS_OK;
174 : }
175 :
176 32855 : *aDestLength = 0;
177 32855 : return NS_OK;
178 : }
|