1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:expandtab:shiftwidth=2:tabstop=4:
3 : */
4 : /* ***** BEGIN LICENSE BLOCK *****
5 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 : *
7 : * The contents of this file are subject to the Mozilla Public License Version
8 : * 1.1 (the "License"); you may not use this file except in compliance with
9 : * the License. You may obtain a copy of the License at
10 : * http://www.mozilla.org/MPL/
11 : *
12 : * Software distributed under the License is distributed on an "AS IS" basis,
13 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 : * for the specific language governing rights and limitations under the
15 : * License.
16 : *
17 : * The Original Code is mozilla.org code.
18 : *
19 : * The Initial Developers of the Original Code are
20 : * Naoki Hotta <nhotta@netscape.com> and Jungshik Shin <jshin@mailaps.org>.
21 : * Portions created by the Initial Developer are Copyright (C) 2002, 2003
22 : * the Initial Developers. All Rights Reserved.
23 : *
24 : * Contributor(s):
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either the GNU General Public License Version 2 or later (the "GPL"), or
28 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 : #include "nsString.h"
40 : #include "nsIUnicodeEncoder.h"
41 : #include "nsICharsetConverterManager.h"
42 : #include "nsReadableUtils.h"
43 : #include "nsIServiceManager.h"
44 : #include "prmem.h"
45 : #include "nsUTF8ConverterService.h"
46 : #include "nsEscape.h"
47 : #include "nsAutoPtr.h"
48 :
49 1205 : NS_IMPL_ISUPPORTS1(nsUTF8ConverterService, nsIUTF8ConverterService)
50 :
51 : static nsresult
52 2 : ToUTF8(const nsACString &aString, const char *aCharset, nsACString &aResult)
53 : {
54 : nsresult rv;
55 2 : if (!aCharset || !*aCharset)
56 0 : return NS_ERROR_INVALID_ARG;
57 :
58 4 : nsCOMPtr<nsICharsetConverterManager> ccm;
59 :
60 2 : ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
61 2 : NS_ENSURE_SUCCESS(rv, rv);
62 :
63 4 : nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
64 2 : rv = ccm->GetUnicodeDecoder(aCharset,
65 2 : getter_AddRefs(unicodeDecoder));
66 2 : NS_ENSURE_SUCCESS(rv, rv);
67 :
68 2 : PRInt32 srcLen = aString.Length();
69 : PRInt32 dstLen;
70 4 : const nsAFlatCString& inStr = PromiseFlatCString(aString);
71 2 : rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
72 2 : NS_ENSURE_SUCCESS(rv, rv);
73 :
74 6 : nsAutoArrayPtr<PRUnichar> ustr(new PRUnichar[dstLen]);
75 2 : NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
76 :
77 2 : rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen);
78 2 : if (NS_SUCCEEDED(rv)){
79 : // Tru64 Cxx needs an explicit get()
80 0 : CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult);
81 : }
82 2 : return rv;
83 : }
84 :
85 : NS_IMETHODIMP
86 237 : nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString,
87 : const char *aCharset,
88 : bool aSkipCheck,
89 : nsACString &aUTF8String)
90 : {
91 : // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
92 : // check is requested. It may not be asked for if a caller suspects
93 : // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
94 : // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
95 237 : if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
96 235 : aUTF8String = aString;
97 235 : return NS_OK;
98 : }
99 :
100 2 : aUTF8String.Truncate();
101 :
102 2 : nsresult rv = ToUTF8(aString, aCharset, aUTF8String);
103 :
104 : // additional protection for cases where check is skipped and the input
105 : // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
106 : // was wrong.) We don't check ASCIIness assuming there's no charset
107 : // incompatible with ASCII (we don't support EBCDIC).
108 2 : if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
109 0 : aUTF8String = aString;
110 0 : return NS_OK;
111 : }
112 :
113 2 : return rv;
114 : }
115 :
116 : NS_IMETHODIMP
117 0 : nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec,
118 : const char *aCharset,
119 : nsACString &aUTF8Spec)
120 : {
121 : // assume UTF-8 if the spec contains unescaped non-ASCII characters.
122 : // No valid spec in Mozilla would break this assumption.
123 0 : if (!IsASCII(aSpec)) {
124 0 : aUTF8Spec = aSpec;
125 0 : return NS_OK;
126 : }
127 :
128 0 : aUTF8Spec.Truncate();
129 :
130 0 : nsCAutoString unescapedSpec;
131 : // NS_UnescapeURL does not fill up unescapedSpec unless there's at least
132 : // one character to unescape.
133 0 : bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(),
134 0 : esc_OnlyNonASCII, unescapedSpec);
135 :
136 0 : if (!written) {
137 0 : aUTF8Spec = aSpec;
138 0 : return NS_OK;
139 : }
140 : // return if ASCII only or escaped UTF-8
141 0 : if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
142 0 : aUTF8Spec = unescapedSpec;
143 0 : return NS_OK;
144 : }
145 :
146 0 : return ToUTF8(unescapedSpec, aCharset, aUTF8Spec);
147 : }
148 :
|