1 : /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is mozilla.org code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Peter Van der Beken.
19 : * Portions created by the Initial Developer are Copyright (C) 2004
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Peter Van der Beken <peter@propagandism.org>
24 : *
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either the GNU General Public License Version 2 or later (the "GPL"), or
28 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 :
40 : #ifdef IS_LITTLE_ENDIAN
41 :
42 : #define PREFIX(ident) little2_ ## ident
43 : #define BYTE_TYPE(p) LITTLE2_BYTE_TYPE(XmlGetUtf16InternalEncodingNS(), p)
44 : #define IS_NAME_CHAR_MINBPC(p) LITTLE2_IS_NAME_CHAR_MINBPC(0, p)
45 : #define IS_NMSTRT_CHAR_MINBPC(p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(0, p)
46 :
47 : #else
48 :
49 : #define PREFIX(ident) big2_ ## ident
50 : #define BYTE_TYPE(p) BIG2_BYTE_TYPE(XmlGetUtf16InternalEncodingNS(), p)
51 : #define IS_NAME_CHAR_MINBPC(p) BIG2_IS_NAME_CHAR_MINBPC(0, p)
52 : #define IS_NMSTRT_CHAR_MINBPC(p) BIG2_IS_NMSTRT_CHAR_MINBPC(0, p)
53 :
54 : #endif
55 :
56 : #define MOZ_EXPAT_VALID_QNAME (0)
57 : #define MOZ_EXPAT_EMPTY_QNAME (1 << 0)
58 : #define MOZ_EXPAT_INVALID_CHARACTER (1 << 1)
59 : #define MOZ_EXPAT_MALFORMED (1 << 2)
60 :
61 2061 : int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware,
62 : const char** colon)
63 : {
64 2061 : int result = MOZ_EXPAT_VALID_QNAME;
65 2061 : int nmstrt = 1;
66 2061 : *colon = 0;
67 2061 : if (ptr == end) {
68 0 : return MOZ_EXPAT_EMPTY_QNAME;
69 : }
70 : do {
71 18734 : switch (BYTE_TYPE(ptr)) {
72 : case BT_COLON:
73 : /* We're namespace-aware and either first or last character is a colon
74 : or we've already seen a colon. */
75 15 : if (ns_aware && (nmstrt || *colon || ptr + 2 == end)) {
76 0 : return MOZ_EXPAT_MALFORMED;
77 : }
78 15 : *colon = ptr;
79 15 : nmstrt = ns_aware; /* e.g. "a:0" should be valid if !ns_aware */
80 15 : break;
81 : case BT_NONASCII:
82 0 : if (nmstrt && !IS_NMSTRT_CHAR_MINBPC(ptr)) {
83 : /* If this is a valid name character and we're namespace-aware, the
84 : QName is malformed. Otherwise, this character's invalid at the
85 : start of a name (or, if we're namespace-aware, at the start of a
86 : localpart). */
87 0 : return (IS_NAME_CHAR_MINBPC(ptr) && ns_aware) ?
88 0 : MOZ_EXPAT_MALFORMED :
89 : MOZ_EXPAT_INVALID_CHARACTER;
90 : }
91 0 : if (!IS_NAME_CHAR_MINBPC(ptr)) {
92 0 : return MOZ_EXPAT_INVALID_CHARACTER;
93 : }
94 0 : nmstrt = 0;
95 0 : break;
96 : case BT_NMSTRT:
97 : case BT_HEX:
98 18683 : nmstrt = 0;
99 18683 : break;
100 : case BT_DIGIT:
101 : case BT_NAME:
102 : case BT_MINUS:
103 36 : if (nmstrt) {
104 0 : return MOZ_EXPAT_INVALID_CHARACTER;
105 : }
106 36 : break;
107 : default:
108 0 : return MOZ_EXPAT_INVALID_CHARACTER;
109 : }
110 18734 : ptr += 2;
111 18734 : } while (ptr != end);
112 2061 : return result;
113 : }
114 :
115 42 : int MOZ_XMLIsLetter(const char* ptr)
116 : {
117 42 : switch (BYTE_TYPE(ptr)) {
118 : case BT_NONASCII:
119 0 : if (!IS_NMSTRT_CHAR_MINBPC(ptr)) {
120 0 : return 0;
121 : }
122 : /* fall through */
123 : case BT_NMSTRT:
124 : case BT_HEX:
125 0 : return 1;
126 : default:
127 42 : return 0;
128 : }
129 : }
130 :
131 0 : int MOZ_XMLIsNCNameChar(const char* ptr)
132 : {
133 0 : switch (BYTE_TYPE(ptr)) {
134 : case BT_NONASCII:
135 0 : if (!IS_NAME_CHAR_MINBPC(ptr)) {
136 0 : return 0;
137 : }
138 : /* fall through */
139 : case BT_NMSTRT:
140 : case BT_HEX:
141 : case BT_DIGIT:
142 : case BT_NAME:
143 : case BT_MINUS:
144 0 : return 1;
145 : default:
146 0 : return 0;
147 : }
148 : }
149 :
150 0 : int MOZ_XMLTranslateEntity(const char* ptr, const char* end, const char** next,
151 : XML_Char* result)
152 : {
153 0 : const ENCODING* enc = XmlGetUtf16InternalEncodingNS();
154 0 : int tok = PREFIX(scanRef)(enc, ptr, end, next);
155 0 : if (tok <= XML_TOK_INVALID) {
156 0 : return 0;
157 : }
158 :
159 0 : if (tok == XML_TOK_CHAR_REF) {
160 0 : int n = XmlCharRefNumber(enc, ptr);
161 :
162 : /* We could get away with just < 0, but better safe than sorry. */
163 0 : if (n <= 0) {
164 0 : return 0;
165 : }
166 :
167 0 : return XmlUtf16Encode(n, (unsigned short*)result);
168 : }
169 :
170 0 : if (tok == XML_TOK_ENTITY_REF) {
171 : /* *next points to after the semicolon, so the entity ends at
172 : *next - enc->minBytesPerChar. */
173 0 : XML_Char ch =
174 0 : (XML_Char)XmlPredefinedEntityName(enc, ptr, *next - enc->minBytesPerChar);
175 0 : if (!ch) {
176 0 : return 0;
177 : }
178 :
179 0 : *result = ch;
180 0 : return 1;
181 : }
182 :
183 0 : return 0;
184 : }
185 :
186 : #undef PREFIX
187 : #undef BYTE_TYPE
188 : #undef IS_NAME_CHAR_MINBPC
189 : #undef IS_NMSTRT_CHAR_MINBPC
|