1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Pierre Phaneuf <pp@ludusdesign.com>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either of the GNU General Public License Version 2 or later (the "GPL"),
27 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 : #include "nsString.h"
39 : #include "nsIUnicodeEncoder.h"
40 : #include "nsICharsetConverterManager.h"
41 : #include "nsReadableUtils.h"
42 : #include "nsITextToSubURI.h"
43 : #include "nsIServiceManager.h"
44 : #include "nsEscape.h"
45 : #include "prmem.h"
46 : #include "nsTextToSubURI.h"
47 : #include "nsCRT.h"
48 :
49 : static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
50 :
51 42 : nsTextToSubURI::nsTextToSubURI()
52 : {
53 42 : }
54 84 : nsTextToSubURI::~nsTextToSubURI()
55 : {
56 168 : }
57 :
58 818 : NS_IMPL_ISUPPORTS1(nsTextToSubURI, nsITextToSubURI)
59 :
60 0 : NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape(
61 : const char *charset, const PRUnichar *text, char **_retval)
62 : {
63 0 : if(nsnull == _retval)
64 0 : return NS_ERROR_NULL_POINTER;
65 0 : *_retval = nsnull;
66 0 : nsresult rv = NS_OK;
67 :
68 : // Get Charset, get the encoder.
69 : nsICharsetConverterManager *ccm;
70 0 : rv = CallGetService(kCharsetConverterManagerCID, &ccm);
71 0 : if(NS_SUCCEEDED(rv)) {
72 : nsIUnicodeEncoder *encoder;
73 0 : rv = ccm->GetUnicodeEncoder(charset, &encoder);
74 0 : NS_RELEASE(ccm);
75 0 : if (NS_SUCCEEDED(rv)) {
76 0 : rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, (PRUnichar)'?');
77 0 : if(NS_SUCCEEDED(rv))
78 : {
79 : char buf[256];
80 0 : char *pBuf = buf;
81 0 : PRInt32 ulen = nsCRT::strlen(text);
82 0 : PRInt32 outlen = 0;
83 0 : if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen)))
84 : {
85 0 : if(outlen >= 256) {
86 0 : pBuf = (char*)NS_Alloc(outlen+1);
87 : }
88 0 : if(nsnull == pBuf) {
89 0 : outlen = 255;
90 0 : pBuf = buf;
91 : }
92 0 : PRInt32 bufLen = outlen;
93 0 : if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
94 : // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
95 0 : PRInt32 finLen = bufLen - outlen;
96 0 : if (finLen > 0) {
97 0 : if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
98 0 : outlen += finLen;
99 : }
100 0 : pBuf[outlen] = '\0';
101 0 : *_retval = nsEscape(pBuf, url_XPAlphas);
102 0 : if(nsnull == *_retval)
103 0 : rv = NS_ERROR_OUT_OF_MEMORY;
104 : }
105 : }
106 0 : if(pBuf != buf)
107 0 : NS_Free(pBuf);
108 : }
109 0 : NS_RELEASE(encoder);
110 : }
111 : }
112 :
113 0 : return rv;
114 : }
115 :
116 1 : NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(
117 : const char *charset, const char *text, PRUnichar **_retval)
118 : {
119 1 : if(nsnull == _retval)
120 0 : return NS_ERROR_NULL_POINTER;
121 1 : if(nsnull == text) {
122 : // set empty string instead of returning error
123 : // due to compatibility for old version
124 1 : text = "";
125 : }
126 1 : *_retval = nsnull;
127 1 : nsresult rv = NS_OK;
128 :
129 : // unescape the string, unescape changes the input
130 1 : char *unescaped = NS_strdup(text);
131 1 : if (nsnull == unescaped)
132 0 : return NS_ERROR_OUT_OF_MEMORY;
133 1 : unescaped = nsUnescape(unescaped);
134 1 : NS_ASSERTION(unescaped, "nsUnescape returned null");
135 :
136 : // Convert from the charset to unicode
137 : nsCOMPtr<nsICharsetConverterManager> ccm =
138 2 : do_GetService(kCharsetConverterManagerCID, &rv);
139 1 : if (NS_SUCCEEDED(rv)) {
140 : nsIUnicodeDecoder *decoder;
141 1 : rv = ccm->GetUnicodeDecoder(charset, &decoder);
142 1 : if (NS_SUCCEEDED(rv)) {
143 1 : PRUnichar *pBuf = nsnull;
144 1 : PRInt32 len = strlen(unescaped);
145 1 : PRInt32 outlen = 0;
146 1 : if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
147 1 : pBuf = (PRUnichar *) NS_Alloc((outlen+1)*sizeof(PRUnichar));
148 1 : if (nsnull == pBuf)
149 0 : rv = NS_ERROR_OUT_OF_MEMORY;
150 : else {
151 1 : if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
152 1 : pBuf[outlen] = 0;
153 1 : *_retval = pBuf;
154 : }
155 : else
156 0 : NS_Free(pBuf);
157 : }
158 : }
159 1 : NS_RELEASE(decoder);
160 : }
161 : }
162 1 : NS_Free(unescaped);
163 :
164 1 : return rv;
165 : }
166 :
167 254 : static bool statefulCharset(const char *charset)
168 : {
169 762 : if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
170 254 : !nsCRT::strcasecmp(charset, "UTF-7") ||
171 254 : !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
172 0 : return true;
173 :
174 254 : return false;
175 : }
176 :
177 254 : nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
178 : const nsAFlatCString &aURI,
179 : bool aIRI,
180 : nsAString &_retval)
181 : {
182 254 : nsresult rv = NS_OK;
183 :
184 : // check for 7bit encoding the data may not be ASCII after we decode
185 254 : bool isStatefulCharset = statefulCharset(aCharset.get());
186 :
187 254 : if (!isStatefulCharset && IsASCII(aURI)) {
188 248 : CopyASCIItoUTF16(aURI, _retval);
189 248 : return rv;
190 : }
191 :
192 6 : if (!isStatefulCharset && aIRI) {
193 6 : if (IsUTF8(aURI)) {
194 6 : CopyUTF8toUTF16(aURI, _retval);
195 6 : return rv;
196 : }
197 : }
198 :
199 : // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
200 0 : NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
201 :
202 0 : nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
203 :
204 0 : charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
205 0 : NS_ENSURE_SUCCESS(rv, rv);
206 :
207 0 : nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
208 0 : rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(),
209 0 : getter_AddRefs(unicodeDecoder));
210 0 : NS_ENSURE_SUCCESS(rv, rv);
211 :
212 0 : PRInt32 srcLen = aURI.Length();
213 : PRInt32 dstLen;
214 0 : rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
215 0 : NS_ENSURE_SUCCESS(rv, rv);
216 :
217 0 : PRUnichar *ustr = (PRUnichar *) NS_Alloc(dstLen * sizeof(PRUnichar));
218 0 : NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
219 :
220 0 : rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
221 :
222 0 : if (NS_SUCCEEDED(rv))
223 0 : _retval.Assign(ustr, dstLen);
224 :
225 0 : NS_Free(ustr);
226 :
227 0 : return rv;
228 : }
229 :
230 254 : NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
231 : const nsACString &aURIFragment,
232 : nsAString &_retval)
233 : {
234 508 : nsCAutoString unescapedSpec;
235 : // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
236 254 : NS_UnescapeURL(PromiseFlatCString(aURIFragment),
237 254 : esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
238 :
239 : // in case of failure, return escaped URI
240 : // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
241 : // sequences are also considered failure in this context
242 508 : if (convertURItoUnicode(
243 508 : PromiseFlatCString(aCharset), unescapedSpec, true, _retval)
244 : != NS_OK)
245 : // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
246 0 : CopyUTF8toUTF16(aURIFragment, _retval);
247 254 : return NS_OK;
248 : }
249 :
250 1 : NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset,
251 : const nsACString & aURIFragment,
252 : nsAString &_retval)
253 : {
254 2 : nsCAutoString unescapedSpec;
255 1 : NS_UnescapeURL(PromiseFlatCString(aURIFragment),
256 1 : esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
257 : // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
258 : // superset since converting "http:" with such an encoding is always a bad
259 : // idea.
260 2 : if (!IsUTF8(unescapedSpec) &&
261 1 : (aCharset.LowerCaseEqualsLiteral("utf-16") ||
262 0 : aCharset.LowerCaseEqualsLiteral("utf-16be") ||
263 0 : aCharset.LowerCaseEqualsLiteral("utf-16le") ||
264 0 : aCharset.LowerCaseEqualsLiteral("utf-7") ||
265 0 : aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
266 1 : CopyASCIItoUTF16(aURIFragment, _retval);
267 1 : return NS_OK;
268 : }
269 :
270 0 : return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval);
271 : }
272 :
273 : //----------------------------------------------------------------------
|