1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is Mozilla Communicator client code.
16 : *
17 : * The Initial Developer of the Original Code is
18 : * Netscape Communications Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 1998
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Pierre Phaneuf <pp@ludusdesign.com>
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either of the GNU General Public License Version 2 or later (the "GPL"),
27 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 : /*
40 :
41 : An implementation for an NGLayout-style content sink that knows how
42 : to build an RDF content model from XML-serialized RDF.
43 :
44 : For more information on the RDF/XML syntax,
45 : see http://www.w3.org/TR/REC-rdf-syntax/
46 :
47 : This code is based on the final W3C Recommendation,
48 : http://www.w3.org/TR/1999/REC-rdf-syntax-19990222.
49 :
50 : Open Issues ------------------
51 :
52 : 1) factoring code with nsXMLContentSink - There's some amount of
53 : common code between this and the HTML content sink. This will
54 : increase as we support more and more HTML elements. How can code
55 : from XML/HTML be factored?
56 :
57 : 2) We don't support the `parseType' attribute on the Description
58 : tag; therefore, it is impossible to "inline" raw XML in this
59 : implemenation.
60 :
61 : 3) We don't build the reifications at parse time due to the
62 : footprint overhead it would incur for large RDF documents. (It
63 : may be possible to attach a "reification" wrapper datasource that
64 : would present this information at query-time.) Because of this,
65 : the `bagID' attribute is not processed correctly.
66 :
67 : 4) No attempt is made to `resolve URIs' to a canonical form (the
68 : specification hints that an implementation should do this). This
69 : is omitted for the obvious reason that we can ill afford to
70 : resolve each URI reference.
71 :
72 : */
73 :
74 : #include "mozilla/Util.h"
75 :
76 : #include "nsCOMPtr.h"
77 : #include "nsInterfaceHashtable.h"
78 : #include "nsIContentSink.h"
79 : #include "nsIRDFContainer.h"
80 : #include "nsIRDFContainerUtils.h"
81 : #include "nsIRDFContentSink.h"
82 : #include "nsIRDFNode.h"
83 : #include "nsIRDFService.h"
84 : #include "nsIRDFXMLSink.h"
85 : #include "nsIServiceManager.h"
86 : #include "nsIURL.h"
87 : #include "nsIXMLContentSink.h"
88 : #include "nsRDFCID.h"
89 : #include "nsTArray.h"
90 : #include "nsXPIDLString.h"
91 : #include "prlog.h"
92 : #include "prmem.h"
93 : #include "rdf.h"
94 : #include "rdfutil.h"
95 : #include "nsReadableUtils.h"
96 : #include "nsIExpatSink.h"
97 : #include "nsCRT.h"
98 : #include "nsIAtom.h"
99 : #include "nsStaticAtom.h"
100 : #include "nsIScriptError.h"
101 : #include "nsIDTD.h"
102 :
103 : using namespace mozilla;
104 :
105 : ////////////////////////////////////////////////////////////////////////
106 : // XPCOM IIDs
107 :
108 : static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID); // XXX grr...
109 : static NS_DEFINE_IID(kIExpatSinkIID, NS_IEXPATSINK_IID);
110 : static NS_DEFINE_IID(kIRDFServiceIID, NS_IRDFSERVICE_IID);
111 : static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
112 : static NS_DEFINE_IID(kIXMLContentSinkIID, NS_IXMLCONTENT_SINK_IID);
113 : static NS_DEFINE_IID(kIRDFContentSinkIID, NS_IRDFCONTENTSINK_IID);
114 :
115 : static NS_DEFINE_CID(kRDFServiceCID, NS_RDFSERVICE_CID);
116 : static NS_DEFINE_CID(kRDFContainerUtilsCID, NS_RDFCONTAINERUTILS_CID);
117 :
118 : ////////////////////////////////////////////////////////////////////////
119 :
120 : #ifdef PR_LOGGING
121 : static PRLogModuleInfo* gLog;
122 : #endif
123 :
124 : ///////////////////////////////////////////////////////////////////////
125 :
126 : enum RDFContentSinkState {
127 : eRDFContentSinkState_InProlog,
128 : eRDFContentSinkState_InDocumentElement,
129 : eRDFContentSinkState_InDescriptionElement,
130 : eRDFContentSinkState_InContainerElement,
131 : eRDFContentSinkState_InPropertyElement,
132 : eRDFContentSinkState_InMemberElement,
133 : eRDFContentSinkState_InEpilog
134 : };
135 :
136 : enum RDFContentSinkParseMode {
137 : eRDFContentSinkParseMode_Resource,
138 : eRDFContentSinkParseMode_Literal,
139 : eRDFContentSinkParseMode_Int,
140 : eRDFContentSinkParseMode_Date
141 : };
142 :
143 : typedef
144 : NS_STDCALL_FUNCPROTO(nsresult,
145 : nsContainerTestFn,
146 : nsIRDFContainerUtils, IsAlt,
147 : (nsIRDFDataSource*, nsIRDFResource*, bool*));
148 :
149 : typedef
150 : NS_STDCALL_FUNCPROTO(nsresult,
151 : nsMakeContainerFn,
152 : nsIRDFContainerUtils, MakeAlt,
153 : (nsIRDFDataSource*, nsIRDFResource*, nsIRDFContainer**));
154 :
155 : class RDFContentSinkImpl : public nsIRDFContentSink,
156 : public nsIExpatSink
157 : {
158 : public:
159 : RDFContentSinkImpl();
160 : virtual ~RDFContentSinkImpl();
161 :
162 : // nsISupports
163 : NS_DECL_ISUPPORTS
164 : NS_DECL_NSIEXPATSINK
165 :
166 : // nsIContentSink
167 : NS_IMETHOD WillParse(void);
168 : NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode);
169 : NS_IMETHOD DidBuildModel(bool aTerminated);
170 : NS_IMETHOD WillInterrupt(void);
171 : NS_IMETHOD WillResume(void);
172 : NS_IMETHOD SetParser(nsParserBase* aParser);
173 0 : virtual void FlushPendingNotifications(mozFlushType aType) { }
174 0 : NS_IMETHOD SetDocumentCharset(nsACString& aCharset) { return NS_OK; }
175 2070 : virtual nsISupports *GetTarget() { return nsnull; }
176 :
177 : // nsIRDFContentSink
178 : NS_IMETHOD Init(nsIURI* aURL);
179 : NS_IMETHOD SetDataSource(nsIRDFDataSource* aDataSource);
180 : NS_IMETHOD GetDataSource(nsIRDFDataSource*& aDataSource);
181 :
182 : // pseudo constants
183 : static PRInt32 gRefCnt;
184 : static nsIRDFService* gRDFService;
185 : static nsIRDFContainerUtils* gRDFContainerUtils;
186 : static nsIRDFResource* kRDF_type;
187 : static nsIRDFResource* kRDF_instanceOf; // XXX should be RDF:type
188 : static nsIRDFResource* kRDF_Alt;
189 : static nsIRDFResource* kRDF_Bag;
190 : static nsIRDFResource* kRDF_Seq;
191 : static nsIRDFResource* kRDF_nextVal;
192 :
193 : #define RDF_ATOM(name_, value_) static nsIAtom* name_;
194 : #include "nsRDFContentSinkAtomList.h"
195 : #undef RDF_ATOM
196 :
197 : typedef struct ContainerInfo {
198 : nsIRDFResource** mType;
199 : nsContainerTestFn mTestFn;
200 : nsMakeContainerFn mMakeFn;
201 : } ContainerInfo;
202 :
203 : protected:
204 : // Text management
205 : void ParseText(nsIRDFNode **aResult);
206 :
207 : nsresult FlushText();
208 : nsresult AddText(const PRUnichar* aText, PRInt32 aLength);
209 :
210 : // RDF-specific parsing
211 : nsresult OpenRDF(const PRUnichar* aName);
212 : nsresult OpenObject(const PRUnichar* aName ,const PRUnichar** aAttributes);
213 : nsresult OpenProperty(const PRUnichar* aName, const PRUnichar** aAttributes);
214 : nsresult OpenMember(const PRUnichar* aName, const PRUnichar** aAttributes);
215 : nsresult OpenValue(const PRUnichar* aName, const PRUnichar** aAttributes);
216 :
217 : nsresult GetIdAboutAttribute(const PRUnichar** aAttributes, nsIRDFResource** aResource, bool* aIsAnonymous = nsnull);
218 : nsresult GetResourceAttribute(const PRUnichar** aAttributes, nsIRDFResource** aResource);
219 : nsresult AddProperties(const PRUnichar** aAttributes, nsIRDFResource* aSubject, PRInt32* aCount = nsnull);
220 : void SetParseMode(const PRUnichar **aAttributes);
221 :
222 : PRUnichar* mText;
223 : PRInt32 mTextLength;
224 : PRInt32 mTextSize;
225 :
226 : /**
227 : * From the set of given attributes, this method extracts the
228 : * namespace definitions and feeds them to the datasource.
229 : * These can then be suggested to the serializer to be used again.
230 : * Hopefully, this will keep namespace definitions intact in a
231 : * parse - serialize cycle.
232 : */
233 : void RegisterNamespaces(const PRUnichar **aAttributes);
234 :
235 : /**
236 : * Extracts the localname from aExpatName, the name that the Expat parser
237 : * passes us.
238 : * aLocalName will contain the localname in aExpatName.
239 : * The return value is a dependent string containing just the namespace.
240 : */
241 : const nsDependentSubstring SplitExpatName(const PRUnichar *aExpatName,
242 : nsIAtom **aLocalName);
243 :
244 : enum eContainerType { eBag, eSeq, eAlt };
245 : nsresult InitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer);
246 : nsresult ReinitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer);
247 :
248 : // The datasource in which we're assigning assertions
249 : nsCOMPtr<nsIRDFDataSource> mDataSource;
250 :
251 : // A hash of all the node IDs referred to
252 : nsInterfaceHashtable<nsStringHashKey, nsIRDFResource> mNodeIDMap;
253 :
254 : // The current state of the content sink
255 : RDFContentSinkState mState;
256 : RDFContentSinkParseMode mParseMode;
257 :
258 : // content stack management
259 : PRInt32
260 : PushContext(nsIRDFResource *aContext,
261 : RDFContentSinkState aState,
262 : RDFContentSinkParseMode aParseMode);
263 :
264 : nsresult
265 : PopContext(nsIRDFResource *&aContext,
266 : RDFContentSinkState &aState,
267 : RDFContentSinkParseMode &aParseMode);
268 :
269 : nsIRDFResource* GetContextElement(PRInt32 ancestor = 0);
270 :
271 :
272 87576 : struct RDFContextStackElement {
273 : nsCOMPtr<nsIRDFResource> mResource;
274 : RDFContentSinkState mState;
275 : RDFContentSinkParseMode mParseMode;
276 : };
277 :
278 : nsAutoTArray<RDFContextStackElement, 8>* mContextStack;
279 :
280 : nsIURI* mDocumentURL;
281 : };
282 :
283 : PRInt32 RDFContentSinkImpl::gRefCnt = 0;
284 : nsIRDFService* RDFContentSinkImpl::gRDFService;
285 : nsIRDFContainerUtils* RDFContentSinkImpl::gRDFContainerUtils;
286 : nsIRDFResource* RDFContentSinkImpl::kRDF_type;
287 : nsIRDFResource* RDFContentSinkImpl::kRDF_instanceOf;
288 : nsIRDFResource* RDFContentSinkImpl::kRDF_Alt;
289 : nsIRDFResource* RDFContentSinkImpl::kRDF_Bag;
290 : nsIRDFResource* RDFContentSinkImpl::kRDF_Seq;
291 : nsIRDFResource* RDFContentSinkImpl::kRDF_nextVal;
292 :
293 : ////////////////////////////////////////////////////////////////////////
294 :
295 : #define RDF_ATOM(name_, value_) nsIAtom* RDFContentSinkImpl::name_;
296 : #include "nsRDFContentSinkAtomList.h"
297 : #undef RDF_ATOM
298 :
299 : #define RDF_ATOM(name_, value_) NS_STATIC_ATOM_BUFFER(name_##_buffer, value_)
300 : #include "nsRDFContentSinkAtomList.h"
301 : #undef RDF_ATOM
302 :
303 : static const nsStaticAtom rdf_atoms[] = {
304 : #define RDF_ATOM(name_, value_) NS_STATIC_ATOM(name_##_buffer, &RDFContentSinkImpl::name_),
305 : #include "nsRDFContentSinkAtomList.h"
306 : #undef RDF_ATOM
307 : };
308 :
309 2072 : RDFContentSinkImpl::RDFContentSinkImpl()
310 : : mText(nsnull),
311 : mTextLength(0),
312 : mTextSize(0),
313 : mState(eRDFContentSinkState_InProlog),
314 : mParseMode(eRDFContentSinkParseMode_Literal),
315 : mContextStack(nsnull),
316 2072 : mDocumentURL(nsnull)
317 : {
318 2072 : if (gRefCnt++ == 0) {
319 535 : nsresult rv = CallGetService(kRDFServiceCID, &gRDFService);
320 :
321 535 : NS_ASSERTION(NS_SUCCEEDED(rv), "unable to get RDF service");
322 535 : if (NS_SUCCEEDED(rv)) {
323 535 : rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "type"),
324 535 : &kRDF_type);
325 535 : rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "instanceOf"),
326 535 : &kRDF_instanceOf);
327 535 : rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Alt"),
328 535 : &kRDF_Alt);
329 535 : rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Bag"),
330 535 : &kRDF_Bag);
331 535 : rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Seq"),
332 535 : &kRDF_Seq);
333 535 : rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "nextVal"),
334 535 : &kRDF_nextVal);
335 : }
336 :
337 :
338 535 : rv = CallGetService(kRDFContainerUtilsCID, &gRDFContainerUtils);
339 :
340 535 : NS_RegisterStaticAtoms(rdf_atoms);
341 : }
342 :
343 2072 : mNodeIDMap.Init();
344 :
345 : #ifdef PR_LOGGING
346 2072 : if (! gLog)
347 172 : gLog = PR_NewLogModule("nsRDFContentSink");
348 : #endif
349 2072 : }
350 :
351 :
352 6216 : RDFContentSinkImpl::~RDFContentSinkImpl()
353 : {
354 : #ifdef DEBUG_REFS
355 : --gInstanceCount;
356 : fprintf(stdout, "%d - RDF: RDFContentSinkImpl\n", gInstanceCount);
357 : #endif
358 :
359 2072 : NS_IF_RELEASE(mDocumentURL);
360 :
361 2072 : if (mContextStack) {
362 2069 : PR_LOG(gLog, PR_LOG_WARNING,
363 : ("rdfxml: warning! unclosed tag"));
364 :
365 : // XXX we should never need to do this, but, we'll write the
366 : // code all the same. If someone left the content stack dirty,
367 : // pop all the elements off the stack and release them.
368 2069 : PRInt32 i = mContextStack->Length();
369 4138 : while (0 < i--) {
370 0 : nsIRDFResource* resource = nsnull;
371 : RDFContentSinkState state;
372 : RDFContentSinkParseMode parseMode;
373 0 : PopContext(resource, state, parseMode);
374 :
375 : #ifdef PR_LOGGING
376 : // print some fairly useless debugging info
377 : // XXX we should save line numbers on the context stack: this'd
378 : // be about 1000x more helpful.
379 0 : if (resource) {
380 0 : nsXPIDLCString uri;
381 0 : resource->GetValue(getter_Copies(uri));
382 0 : PR_LOG(gLog, PR_LOG_NOTICE,
383 : ("rdfxml: uri=%s", (const char*) uri));
384 : }
385 : #endif
386 :
387 0 : NS_IF_RELEASE(resource);
388 : }
389 :
390 2069 : delete mContextStack;
391 : }
392 2072 : PR_FREEIF(mText);
393 :
394 :
395 2072 : if (--gRefCnt == 0) {
396 535 : NS_IF_RELEASE(gRDFService);
397 535 : NS_IF_RELEASE(gRDFContainerUtils);
398 535 : NS_IF_RELEASE(kRDF_type);
399 535 : NS_IF_RELEASE(kRDF_instanceOf);
400 535 : NS_IF_RELEASE(kRDF_Alt);
401 535 : NS_IF_RELEASE(kRDF_Bag);
402 535 : NS_IF_RELEASE(kRDF_Seq);
403 535 : NS_IF_RELEASE(kRDF_nextVal);
404 : }
405 8288 : }
406 :
407 : ////////////////////////////////////////////////////////////////////////
408 : // nsISupports interface
409 :
410 16570 : NS_IMPL_ADDREF(RDFContentSinkImpl)
411 16570 : NS_IMPL_RELEASE(RDFContentSinkImpl)
412 :
413 : NS_IMETHODIMP
414 14609 : RDFContentSinkImpl::QueryInterface(REFNSIID iid, void** result)
415 : {
416 14609 : NS_PRECONDITION(result, "null ptr");
417 14609 : if (! result)
418 0 : return NS_ERROR_NULL_POINTER;
419 :
420 14609 : *result = nsnull;
421 41862 : if (iid.Equals(kIRDFContentSinkIID) ||
422 10465 : iid.Equals(kIXMLContentSinkIID) ||
423 10465 : iid.Equals(kIContentSinkIID) ||
424 6323 : iid.Equals(kISupportsIID)) {
425 8286 : *result = static_cast<nsIXMLContentSink*>(this);
426 8286 : AddRef();
427 8286 : return NS_OK;
428 : }
429 6323 : else if (iid.Equals(kIExpatSinkIID)) {
430 2070 : *result = static_cast<nsIExpatSink*>(this);
431 2070 : AddRef();
432 2070 : return NS_OK;
433 : }
434 4253 : return NS_NOINTERFACE;
435 : }
436 :
437 : NS_IMETHODIMP
438 43788 : RDFContentSinkImpl::HandleStartElement(const PRUnichar *aName,
439 : const PRUnichar **aAtts,
440 : PRUint32 aAttsCount,
441 : PRInt32 aIndex,
442 : PRUint32 aLineNumber)
443 : {
444 43788 : FlushText();
445 :
446 43788 : nsresult rv = NS_ERROR_UNEXPECTED; // XXX
447 :
448 43788 : RegisterNamespaces(aAtts);
449 :
450 43788 : switch (mState) {
451 : case eRDFContentSinkState_InProlog:
452 2069 : rv = OpenRDF(aName);
453 2069 : break;
454 :
455 : case eRDFContentSinkState_InDocumentElement:
456 3461 : rv = OpenObject(aName,aAtts);
457 3461 : break;
458 :
459 : case eRDFContentSinkState_InDescriptionElement:
460 28908 : rv = OpenProperty(aName,aAtts);
461 28908 : break;
462 :
463 : case eRDFContentSinkState_InContainerElement:
464 2004 : rv = OpenMember(aName,aAtts);
465 2004 : break;
466 :
467 : case eRDFContentSinkState_InPropertyElement:
468 : case eRDFContentSinkState_InMemberElement:
469 7346 : rv = OpenValue(aName,aAtts);
470 7346 : break;
471 :
472 : case eRDFContentSinkState_InEpilog:
473 0 : PR_LOG(gLog, PR_LOG_WARNING,
474 : ("rdfxml: unexpected content in epilog at line %d",
475 : aLineNumber));
476 0 : break;
477 : }
478 :
479 43788 : return rv;
480 : }
481 :
482 : NS_IMETHODIMP
483 43788 : RDFContentSinkImpl::HandleEndElement(const PRUnichar *aName)
484 : {
485 43788 : FlushText();
486 :
487 : nsIRDFResource* resource;
488 43788 : if (NS_FAILED(PopContext(resource, mState, mParseMode))) {
489 : // XXX parser didn't catch unmatched tags?
490 : #ifdef PR_LOGGING
491 0 : if (PR_LOG_TEST(gLog, PR_LOG_WARNING)) {
492 0 : nsAutoString tagStr(aName);
493 0 : char* tagCStr = ToNewCString(tagStr);
494 :
495 : PR_LogPrint
496 : ("rdfxml: extra close tag '%s' at line %d",
497 0 : tagCStr, 0/*XXX fix me */);
498 :
499 0 : NS_Free(tagCStr);
500 : }
501 : #endif
502 :
503 0 : return NS_ERROR_UNEXPECTED; // XXX
504 : }
505 :
506 : // If we've just popped a member or property element, _now_ is the
507 : // time to add that element to the graph.
508 43788 : switch (mState) {
509 : case eRDFContentSinkState_InMemberElement:
510 : {
511 3748 : nsCOMPtr<nsIRDFContainer> container;
512 1874 : NS_NewRDFContainer(getter_AddRefs(container));
513 1874 : container->Init(mDataSource, GetContextElement(1));
514 1874 : container->AppendElement(resource);
515 : }
516 1874 : break;
517 :
518 : case eRDFContentSinkState_InPropertyElement:
519 : {
520 5472 : mDataSource->Assert(GetContextElement(1), GetContextElement(0), resource, true);
521 5472 : } break;
522 : default:
523 36442 : break;
524 : }
525 :
526 43788 : if (mContextStack->IsEmpty())
527 2069 : mState = eRDFContentSinkState_InEpilog;
528 :
529 43788 : NS_IF_RELEASE(resource);
530 43788 : return NS_OK;
531 : }
532 :
533 : NS_IMETHODIMP
534 2014 : RDFContentSinkImpl::HandleComment(const PRUnichar *aName)
535 : {
536 2014 : return NS_OK;
537 : }
538 :
539 : NS_IMETHODIMP
540 0 : RDFContentSinkImpl::HandleCDataSection(const PRUnichar *aData,
541 : PRUint32 aLength)
542 : {
543 0 : return aData ? AddText(aData, aLength) : NS_OK;
544 : }
545 :
546 : NS_IMETHODIMP
547 14 : RDFContentSinkImpl::HandleDoctypeDecl(const nsAString & aSubset,
548 : const nsAString & aName,
549 : const nsAString & aSystemId,
550 : const nsAString & aPublicId,
551 : nsISupports* aCatalogData)
552 : {
553 14 : return NS_OK;
554 : }
555 :
556 : NS_IMETHODIMP
557 149695 : RDFContentSinkImpl::HandleCharacterData(const PRUnichar *aData,
558 : PRUint32 aLength)
559 : {
560 149695 : return aData ? AddText(aData, aLength) : NS_OK;
561 : }
562 :
563 : NS_IMETHODIMP
564 0 : RDFContentSinkImpl::HandleProcessingInstruction(const PRUnichar *aTarget,
565 : const PRUnichar *aData)
566 : {
567 0 : return NS_OK;
568 : }
569 :
570 : NS_IMETHODIMP
571 2069 : RDFContentSinkImpl::HandleXMLDeclaration(const PRUnichar *aVersion,
572 : const PRUnichar *aEncoding,
573 : PRInt32 aStandalone)
574 : {
575 2069 : return NS_OK;
576 : }
577 :
578 : NS_IMETHODIMP
579 1 : RDFContentSinkImpl::ReportError(const PRUnichar* aErrorText,
580 : const PRUnichar* aSourceText,
581 : nsIScriptError *aError,
582 : bool *_retval)
583 : {
584 1 : NS_PRECONDITION(aError && aSourceText && aErrorText, "Check arguments!!!");
585 :
586 : // The expat driver should report the error.
587 1 : *_retval = true;
588 1 : return NS_OK;
589 : }
590 :
591 : ////////////////////////////////////////////////////////////////////////
592 : // nsIContentSink interface
593 :
594 : NS_IMETHODIMP
595 4139 : RDFContentSinkImpl::WillParse(void)
596 : {
597 4139 : return NS_OK;
598 : }
599 :
600 :
601 : NS_IMETHODIMP
602 2070 : RDFContentSinkImpl::WillBuildModel(nsDTDMode)
603 : {
604 2070 : if (mDataSource) {
605 4140 : nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource);
606 2070 : if (sink)
607 8 : return sink->BeginLoad();
608 : }
609 2062 : return NS_OK;
610 : }
611 :
612 : NS_IMETHODIMP
613 2070 : RDFContentSinkImpl::DidBuildModel(bool aTerminated)
614 : {
615 2070 : if (mDataSource) {
616 4140 : nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource);
617 2070 : if (sink)
618 8 : return sink->EndLoad();
619 : }
620 2062 : return NS_OK;
621 : }
622 :
623 : NS_IMETHODIMP
624 2069 : RDFContentSinkImpl::WillInterrupt(void)
625 : {
626 2069 : if (mDataSource) {
627 4138 : nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource);
628 2069 : if (sink)
629 7 : return sink->Interrupt();
630 : }
631 2062 : return NS_OK;
632 : }
633 :
634 : NS_IMETHODIMP
635 4139 : RDFContentSinkImpl::WillResume(void)
636 : {
637 4139 : if (mDataSource) {
638 8278 : nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource);
639 4139 : if (sink)
640 15 : return sink->Resume();
641 : }
642 4124 : return NS_OK;
643 : }
644 :
645 : NS_IMETHODIMP
646 2072 : RDFContentSinkImpl::SetParser(nsParserBase* aParser)
647 : {
648 2072 : return NS_OK;
649 : }
650 :
651 : ////////////////////////////////////////////////////////////////////////
652 : // nsIRDFContentSink interface
653 :
654 : NS_IMETHODIMP
655 2072 : RDFContentSinkImpl::Init(nsIURI* aURL)
656 : {
657 2072 : NS_PRECONDITION(aURL != nsnull, "null ptr");
658 2072 : if (! aURL)
659 0 : return NS_ERROR_NULL_POINTER;
660 :
661 2072 : mDocumentURL = aURL;
662 2072 : NS_ADDREF(aURL);
663 :
664 2072 : mState = eRDFContentSinkState_InProlog;
665 2072 : return NS_OK;
666 : }
667 :
668 : NS_IMETHODIMP
669 2072 : RDFContentSinkImpl::SetDataSource(nsIRDFDataSource* aDataSource)
670 : {
671 2072 : NS_PRECONDITION(aDataSource != nsnull, "SetDataSource null ptr");
672 2072 : mDataSource = aDataSource;
673 2072 : NS_ASSERTION(mDataSource != nsnull,"Couldn't QI RDF DataSource");
674 2072 : return NS_OK;
675 : }
676 :
677 :
678 : NS_IMETHODIMP
679 0 : RDFContentSinkImpl::GetDataSource(nsIRDFDataSource*& aDataSource)
680 : {
681 0 : aDataSource = mDataSource;
682 0 : NS_IF_ADDREF(aDataSource);
683 0 : return NS_OK;
684 : }
685 :
686 : ////////////////////////////////////////////////////////////////////////
687 : // Text buffering
688 :
689 : static bool
690 85639 : rdf_IsDataInBuffer(PRUnichar* buffer, PRInt32 length)
691 : {
692 1181862 : for (PRInt32 i = 0; i < length; ++i) {
693 741018 : if (buffer[i] == ' ' ||
694 94465 : buffer[i] == '\t' ||
695 94457 : buffer[i] == '\n' ||
696 23410 : buffer[i] == '\r')
697 505292 : continue;
698 :
699 23394 : return true;
700 : }
701 62245 : return false;
702 : }
703 :
704 : void
705 23394 : RDFContentSinkImpl::ParseText(nsIRDFNode **aResult)
706 : {
707 : // XXXwaterson wasteful, but we'd need to make a copy anyway to be
708 : // able to call nsIRDFService::Get[Resource|Literal|...]().
709 46788 : nsAutoString value;
710 23394 : value.Append(mText, mTextLength);
711 23394 : value.Trim(" \t\n\r");
712 :
713 23394 : switch (mParseMode) {
714 : case eRDFContentSinkParseMode_Literal:
715 : {
716 : nsIRDFLiteral *result;
717 23362 : gRDFService->GetLiteral(value.get(), &result);
718 23362 : *aResult = result;
719 : }
720 23362 : break;
721 :
722 : case eRDFContentSinkParseMode_Resource:
723 : {
724 : nsIRDFResource *result;
725 0 : gRDFService->GetUnicodeResource(value, &result);
726 0 : *aResult = result;
727 : }
728 0 : break;
729 :
730 : case eRDFContentSinkParseMode_Int:
731 : {
732 : PRInt32 i, err;
733 32 : i = value.ToInteger(&err);
734 : nsIRDFInt *result;
735 32 : gRDFService->GetIntLiteral(i, &result);
736 32 : *aResult = result;
737 : }
738 32 : break;
739 :
740 : case eRDFContentSinkParseMode_Date:
741 : {
742 0 : PRTime t = rdf_ParseDate(nsDependentCString(NS_LossyConvertUTF16toASCII(value).get(), value.Length()));
743 : nsIRDFDate *result;
744 0 : gRDFService->GetDateLiteral(t, &result);
745 0 : *aResult = result;
746 : }
747 0 : break;
748 :
749 : default:
750 0 : NS_NOTREACHED("unknown parse type");
751 0 : break;
752 : }
753 23394 : }
754 :
755 : nsresult
756 87576 : RDFContentSinkImpl::FlushText()
757 : {
758 87576 : nsresult rv = NS_OK;
759 87576 : if (0 != mTextLength) {
760 85639 : if (rdf_IsDataInBuffer(mText, mTextLength)) {
761 : // XXX if there's anything but whitespace, then we'll
762 : // create a text node.
763 :
764 23394 : switch (mState) {
765 : case eRDFContentSinkState_InMemberElement: {
766 0 : nsCOMPtr<nsIRDFNode> node;
767 0 : ParseText(getter_AddRefs(node));
768 :
769 0 : nsCOMPtr<nsIRDFContainer> container;
770 0 : NS_NewRDFContainer(getter_AddRefs(container));
771 0 : container->Init(mDataSource, GetContextElement(1));
772 :
773 0 : container->AppendElement(node);
774 0 : } break;
775 :
776 : case eRDFContentSinkState_InPropertyElement: {
777 46788 : nsCOMPtr<nsIRDFNode> node;
778 23394 : ParseText(getter_AddRefs(node));
779 :
780 23394 : mDataSource->Assert(GetContextElement(1), GetContextElement(0), node, true);
781 23394 : } break;
782 :
783 : default:
784 : // just ignore it
785 0 : break;
786 : }
787 : }
788 85639 : mTextLength = 0;
789 : }
790 87576 : return rv;
791 : }
792 :
793 :
794 : nsresult
795 149695 : RDFContentSinkImpl::AddText(const PRUnichar* aText, PRInt32 aLength)
796 : {
797 : // Create buffer when we first need it
798 149695 : if (0 == mTextSize) {
799 2069 : mText = (PRUnichar *) PR_MALLOC(sizeof(PRUnichar) * 4096);
800 2069 : if (!mText) {
801 0 : return NS_ERROR_OUT_OF_MEMORY;
802 : }
803 2069 : mTextSize = 4096;
804 : }
805 :
806 : // Copy data from string into our buffer; grow the buffer as needed.
807 : // It never shrinks, but since the content sink doesn't stick around,
808 : // this shouldn't be a bloat issue.
809 149695 : PRInt32 amount = mTextSize - mTextLength;
810 149695 : if (amount < aLength) {
811 : // Grow the buffer by at least a factor of two to prevent thrashing.
812 : // Since PR_REALLOC will leave mText intact if the call fails,
813 : // don't clobber mText or mTextSize until the new mem is allocated.
814 : PRInt32 newSize = (2 * mTextSize > (mTextSize + aLength)) ?
815 0 : (2 * mTextSize) : (mTextSize + aLength);
816 : PRUnichar* newText =
817 0 : (PRUnichar *) PR_REALLOC(mText, sizeof(PRUnichar) * newSize);
818 0 : if (!newText)
819 0 : return NS_ERROR_OUT_OF_MEMORY;
820 0 : mTextSize = newSize;
821 0 : mText = newText;
822 : }
823 149695 : memcpy(&mText[mTextLength], aText, sizeof(PRUnichar) * aLength);
824 149695 : mTextLength += aLength;
825 :
826 149695 : return NS_OK;
827 : }
828 :
829 : bool
830 3623 : rdf_RequiresAbsoluteURI(const nsString& uri)
831 : {
832 : // cheap shot at figuring out if this requires an absolute url translation
833 10869 : return !(StringBeginsWith(uri, NS_LITERAL_STRING("urn:")) ||
834 10869 : StringBeginsWith(uri, NS_LITERAL_STRING("chrome:")));
835 : }
836 :
837 : nsresult
838 39683 : RDFContentSinkImpl::GetIdAboutAttribute(const PRUnichar** aAttributes,
839 : nsIRDFResource** aResource,
840 : bool* aIsAnonymous)
841 : {
842 : // This corresponds to the dirty work of production [6.5]
843 39683 : nsresult rv = NS_OK;
844 :
845 79366 : nsAutoString nodeID;
846 :
847 79366 : nsCOMPtr<nsIAtom> localName;
848 39917 : for (; *aAttributes; aAttributes += 2) {
849 : const nsDependentSubstring& nameSpaceURI =
850 7390 : SplitExpatName(aAttributes[0], getter_AddRefs(localName));
851 :
852 : // We'll accept either `ID' or `rdf:ID' (ibid with `about' or
853 : // `rdf:about') in the spirit of being liberal towards the
854 : // input that we receive.
855 4001 : if (!nameSpaceURI.IsEmpty() &&
856 306 : !nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) {
857 234 : continue;
858 : }
859 :
860 : // XXX you can't specify both, but we'll just pick up the
861 : // first thing that was specified and ignore the other.
862 :
863 3461 : if (localName == kAboutAtom) {
864 3461 : if (aIsAnonymous)
865 0 : *aIsAnonymous = false;
866 :
867 6922 : nsAutoString relURI(aAttributes[1]);
868 3461 : if (rdf_RequiresAbsoluteURI(relURI)) {
869 72 : nsCAutoString uri;
870 36 : rv = mDocumentURL->Resolve(NS_ConvertUTF16toUTF8(aAttributes[1]), uri);
871 36 : if (NS_FAILED(rv)) return rv;
872 :
873 : return gRDFService->GetResource(uri,
874 36 : aResource);
875 : }
876 6850 : return gRDFService->GetResource(NS_ConvertUTF16toUTF8(aAttributes[1]),
877 6850 : aResource);
878 : }
879 0 : else if (localName == kIdAtom) {
880 0 : if (aIsAnonymous)
881 0 : *aIsAnonymous = false;
882 : // In the spirit of leniency, we do not bother trying to
883 : // enforce that this be a valid "XML Name" (see
884 : // http://www.w3.org/TR/REC-xml#NT-Nmtoken), as per
885 : // 6.21. If we wanted to, this would be where to do it.
886 :
887 : // Construct an in-line resource whose URI is the
888 : // document's URI plus the XML name specified in the ID
889 : // attribute.
890 0 : nsCAutoString name;
891 0 : nsCAutoString ref('#');
892 0 : AppendUTF16toUTF8(aAttributes[1], ref);
893 :
894 0 : rv = mDocumentURL->Resolve(ref, name);
895 0 : if (NS_FAILED(rv)) return rv;
896 :
897 0 : return gRDFService->GetResource(name, aResource);
898 : }
899 0 : else if (localName == kNodeIdAtom) {
900 0 : nodeID.Assign(aAttributes[1]);
901 : }
902 0 : else if (localName == kAboutEachAtom) {
903 : // XXX we don't deal with aboutEach...
904 : //PR_LOG(gLog, PR_LOG_WARNING,
905 : // ("rdfxml: ignoring aboutEach at line %d",
906 : // aNode.GetSourceLineNumber()));
907 : }
908 : }
909 :
910 : // Otherwise, we couldn't find anything, so just gensym one...
911 36222 : if (aIsAnonymous)
912 28876 : *aIsAnonymous = true;
913 :
914 : // If nodeID is present, check if we already know about it. If we've seen
915 : // the nodeID before, use the same resource, otherwise generate a new one.
916 36222 : if (!nodeID.IsEmpty()) {
917 0 : mNodeIDMap.Get(nodeID,aResource);
918 :
919 0 : if (!*aResource) {
920 0 : rv = gRDFService->GetAnonymousResource(aResource);
921 0 : mNodeIDMap.Put(nodeID,*aResource);
922 : }
923 : }
924 : else {
925 36222 : rv = gRDFService->GetAnonymousResource(aResource);
926 : }
927 :
928 36222 : return rv;
929 : }
930 :
931 : nsresult
932 30912 : RDFContentSinkImpl::GetResourceAttribute(const PRUnichar** aAttributes,
933 : nsIRDFResource** aResource)
934 : {
935 61824 : nsCOMPtr<nsIAtom> localName;
936 :
937 61824 : nsAutoString nodeID;
938 :
939 30944 : for (; *aAttributes; aAttributes += 2) {
940 : const nsDependentSubstring& nameSpaceURI =
941 388 : SplitExpatName(aAttributes[0], getter_AddRefs(localName));
942 :
943 : // We'll accept `resource' or `rdf:resource', under the spirit
944 : // that we should be liberal towards the input that we
945 : // receive.
946 388 : if (!nameSpaceURI.IsEmpty() &&
947 194 : !nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) {
948 32 : continue;
949 : }
950 :
951 : // XXX you can't specify both, but we'll just pick up the
952 : // first thing that was specified and ignore the other.
953 :
954 162 : if (localName == kResourceAtom) {
955 : // XXX Take the URI and make it fully qualified by
956 : // sticking it into the document's URL. This may not be
957 : // appropriate...
958 324 : nsAutoString relURI(aAttributes[1]);
959 162 : if (rdf_RequiresAbsoluteURI(relURI)) {
960 : nsresult rv;
961 64 : nsCAutoString uri;
962 :
963 32 : rv = mDocumentURL->Resolve(NS_ConvertUTF16toUTF8(aAttributes[1]), uri);
964 32 : if (NS_FAILED(rv)) return rv;
965 :
966 32 : return gRDFService->GetResource(uri, aResource);
967 : }
968 260 : return gRDFService->GetResource(NS_ConvertUTF16toUTF8(aAttributes[1]),
969 260 : aResource);
970 : }
971 0 : else if (localName == kNodeIdAtom) {
972 0 : nodeID.Assign(aAttributes[1]);
973 : }
974 : }
975 :
976 : // If nodeID is present, check if we already know about it. If we've seen
977 : // the nodeID before, use the same resource, otherwise generate a new one.
978 30750 : if (!nodeID.IsEmpty()) {
979 0 : mNodeIDMap.Get(nodeID,aResource);
980 :
981 0 : if (!*aResource) {
982 : nsresult rv;
983 0 : rv = gRDFService->GetAnonymousResource(aResource);
984 0 : if (NS_FAILED(rv)) {
985 0 : return rv;
986 : }
987 0 : mNodeIDMap.Put(nodeID,*aResource);
988 : }
989 0 : return NS_OK;
990 : }
991 :
992 30750 : return NS_ERROR_FAILURE;
993 : }
994 :
995 : nsresult
996 39715 : RDFContentSinkImpl::AddProperties(const PRUnichar** aAttributes,
997 : nsIRDFResource* aSubject,
998 : PRInt32* aCount)
999 : {
1000 39715 : if (aCount)
1001 28908 : *aCount = 0;
1002 :
1003 79430 : nsCOMPtr<nsIAtom> localName;
1004 43677 : for (; *aAttributes; aAttributes += 2) {
1005 : const nsDependentSubstring& nameSpaceURI =
1006 7924 : SplitExpatName(aAttributes[0], getter_AddRefs(localName));
1007 :
1008 : // skip 'xmlns' directives, these are "meta" information
1009 3962 : if (nameSpaceURI.EqualsLiteral("http://www.w3.org/2000/xmlns/")) {
1010 0 : continue;
1011 : }
1012 :
1013 : // skip `about', `ID', `resource', and 'nodeID' attributes (either with or
1014 : // without the `rdf:' prefix); these are all "special" and
1015 : // should've been dealt with by the caller.
1016 4932 : if (localName == kAboutAtom || localName == kIdAtom ||
1017 970 : localName == kResourceAtom || localName == kNodeIdAtom) {
1018 3597 : if (nameSpaceURI.IsEmpty() ||
1019 104 : nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI))
1020 3493 : continue;
1021 : }
1022 :
1023 : // Skip `parseType', `RDF:parseType', and `NC:parseType'. This
1024 : // is meta-information that will be handled in SetParseMode.
1025 469 : if (localName == kParseTypeAtom) {
1026 96 : if (nameSpaceURI.IsEmpty() ||
1027 32 : nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) ||
1028 32 : nameSpaceURI.EqualsLiteral(NC_NAMESPACE_URI)) {
1029 32 : continue;
1030 : }
1031 : }
1032 :
1033 874 : NS_ConvertUTF16toUTF8 propertyStr(nameSpaceURI);
1034 437 : propertyStr.Append(nsAtomCString(localName));
1035 :
1036 : // Add the assertion to RDF
1037 874 : nsCOMPtr<nsIRDFResource> property;
1038 437 : gRDFService->GetResource(propertyStr, getter_AddRefs(property));
1039 :
1040 874 : nsCOMPtr<nsIRDFLiteral> target;
1041 437 : gRDFService->GetLiteral(aAttributes[1],
1042 874 : getter_AddRefs(target));
1043 :
1044 437 : mDataSource->Assert(aSubject, property, target, true);
1045 : }
1046 39715 : return NS_OK;
1047 : }
1048 :
1049 : void
1050 30912 : RDFContentSinkImpl::SetParseMode(const PRUnichar **aAttributes)
1051 : {
1052 61824 : nsCOMPtr<nsIAtom> localName;
1053 31074 : for (; *aAttributes; aAttributes += 2) {
1054 : const nsDependentSubstring& nameSpaceURI =
1055 388 : SplitExpatName(aAttributes[0], getter_AddRefs(localName));
1056 :
1057 194 : if (localName == kParseTypeAtom) {
1058 64 : nsDependentString v(aAttributes[1]);
1059 :
1060 64 : if (nameSpaceURI.IsEmpty() ||
1061 32 : nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) {
1062 0 : if (v.EqualsLiteral("Resource"))
1063 0 : mParseMode = eRDFContentSinkParseMode_Resource;
1064 :
1065 : break;
1066 : }
1067 32 : else if (nameSpaceURI.EqualsLiteral(NC_NAMESPACE_URI)) {
1068 32 : if (v.EqualsLiteral("Date"))
1069 0 : mParseMode = eRDFContentSinkParseMode_Date;
1070 32 : else if (v.EqualsLiteral("Integer"))
1071 32 : mParseMode = eRDFContentSinkParseMode_Int;
1072 :
1073 : break;
1074 : }
1075 : }
1076 : }
1077 30912 : }
1078 :
1079 : ////////////////////////////////////////////////////////////////////////
1080 : // RDF-specific routines used to build the model
1081 :
1082 : nsresult
1083 2069 : RDFContentSinkImpl::OpenRDF(const PRUnichar* aName)
1084 : {
1085 : // ensure that we're actually reading RDF by making sure that the
1086 : // opening tag is <rdf:RDF>, where "rdf:" corresponds to whatever
1087 : // they've declared the standard RDF namespace to be.
1088 4138 : nsCOMPtr<nsIAtom> localName;
1089 : const nsDependentSubstring& nameSpaceURI =
1090 4138 : SplitExpatName(aName, getter_AddRefs(localName));
1091 :
1092 2069 : if (!nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) || localName != kRDFAtom) {
1093 : // PR_LOG(gLog, PR_LOG_ALWAYS,
1094 : // ("rdfxml: expected RDF:RDF at line %d",
1095 : // aNode.GetSourceLineNumber()));
1096 :
1097 0 : return NS_ERROR_UNEXPECTED;
1098 : }
1099 :
1100 2069 : PushContext(nsnull, mState, mParseMode);
1101 2069 : mState = eRDFContentSinkState_InDocumentElement;
1102 2069 : return NS_OK;
1103 : }
1104 :
1105 : nsresult
1106 10807 : RDFContentSinkImpl::OpenObject(const PRUnichar* aName,
1107 : const PRUnichar** aAttributes)
1108 : {
1109 : // an "object" non-terminal is either a "description", a "typed
1110 : // node", or a "container", so this change the content sink's
1111 : // state appropriately.
1112 21614 : nsCOMPtr<nsIAtom> localName;
1113 : const nsDependentSubstring& nameSpaceURI =
1114 21614 : SplitExpatName(aName, getter_AddRefs(localName));
1115 :
1116 : // Figure out the URI of this object, and create an RDF node for it.
1117 21614 : nsCOMPtr<nsIRDFResource> source;
1118 10807 : GetIdAboutAttribute(aAttributes, getter_AddRefs(source));
1119 :
1120 : // If there is no `ID' or `about', then there's not much we can do.
1121 10807 : if (! source)
1122 0 : return NS_ERROR_FAILURE;
1123 :
1124 : // Push the element onto the context stack
1125 10807 : PushContext(source, mState, mParseMode);
1126 :
1127 : // Now figure out what kind of state transition we need to
1128 : // make. We'll either be going into a mode where we parse a
1129 : // description or a container.
1130 10807 : bool isaTypedNode = true;
1131 :
1132 10807 : if (nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) {
1133 10807 : isaTypedNode = false;
1134 :
1135 10807 : if (localName == kDescriptionAtom) {
1136 : // it's a description
1137 9351 : mState = eRDFContentSinkState_InDescriptionElement;
1138 : }
1139 1456 : else if (localName == kBagAtom) {
1140 : // it's a bag container
1141 0 : InitContainer(kRDF_Bag, source);
1142 0 : mState = eRDFContentSinkState_InContainerElement;
1143 : }
1144 1456 : else if (localName == kSeqAtom) {
1145 : // it's a seq container
1146 1456 : InitContainer(kRDF_Seq, source);
1147 1456 : mState = eRDFContentSinkState_InContainerElement;
1148 : }
1149 0 : else if (localName == kAltAtom) {
1150 : // it's an alt container
1151 0 : InitContainer(kRDF_Alt, source);
1152 0 : mState = eRDFContentSinkState_InContainerElement;
1153 : }
1154 : else {
1155 : // heh, that's not *in* the RDF namespace: just treat it
1156 : // like a typed node
1157 0 : isaTypedNode = true;
1158 : }
1159 : }
1160 :
1161 10807 : if (isaTypedNode) {
1162 0 : NS_ConvertUTF16toUTF8 typeStr(nameSpaceURI);
1163 0 : typeStr.Append(nsAtomCString(localName));
1164 :
1165 0 : nsCOMPtr<nsIRDFResource> type;
1166 0 : nsresult rv = gRDFService->GetResource(typeStr, getter_AddRefs(type));
1167 0 : if (NS_FAILED(rv)) return rv;
1168 :
1169 0 : rv = mDataSource->Assert(source, kRDF_type, type, true);
1170 0 : if (NS_FAILED(rv)) return rv;
1171 :
1172 0 : mState = eRDFContentSinkState_InDescriptionElement;
1173 : }
1174 :
1175 10807 : AddProperties(aAttributes, source);
1176 10807 : return NS_OK;
1177 : }
1178 :
1179 : nsresult
1180 28908 : RDFContentSinkImpl::OpenProperty(const PRUnichar* aName, const PRUnichar** aAttributes)
1181 : {
1182 : nsresult rv;
1183 :
1184 : // an "object" non-terminal is either a "description", a "typed
1185 : // node", or a "container", so this change the content sink's
1186 : // state appropriately.
1187 57816 : nsCOMPtr<nsIAtom> localName;
1188 : const nsDependentSubstring& nameSpaceURI =
1189 57816 : SplitExpatName(aName, getter_AddRefs(localName));
1190 :
1191 57816 : NS_ConvertUTF16toUTF8 propertyStr(nameSpaceURI);
1192 28908 : propertyStr.Append(nsAtomCString(localName));
1193 :
1194 57816 : nsCOMPtr<nsIRDFResource> property;
1195 28908 : rv = gRDFService->GetResource(propertyStr, getter_AddRefs(property));
1196 28908 : if (NS_FAILED(rv)) return rv;
1197 :
1198 : // See if they've specified a 'resource' attribute, in which case
1199 : // they mean *that* to be the object of this property.
1200 57816 : nsCOMPtr<nsIRDFResource> target;
1201 28908 : GetResourceAttribute(aAttributes, getter_AddRefs(target));
1202 :
1203 28908 : bool isAnonymous = false;
1204 :
1205 28908 : if (! target) {
1206 : // See if an 'ID' attribute has been specified, in which case
1207 : // this corresponds to the fourth form of [6.12].
1208 :
1209 : // XXX strictly speaking, we should reject the RDF/XML as
1210 : // invalid if they've specified both an 'ID' and a 'resource'
1211 : // attribute. Bah.
1212 :
1213 : // XXX strictly speaking, 'about=' isn't allowed here, but
1214 : // what the hell.
1215 28876 : GetIdAboutAttribute(aAttributes, getter_AddRefs(target), &isAnonymous);
1216 : }
1217 :
1218 28908 : if (target) {
1219 : // They specified an inline resource for the value of this
1220 : // property. Create an RDF resource for the inline resource
1221 : // URI, add the properties to it, and attach the inline
1222 : // resource to its parent.
1223 : PRInt32 count;
1224 28908 : rv = AddProperties(aAttributes, target, &count);
1225 28908 : NS_ASSERTION(NS_SUCCEEDED(rv), "problem adding properties");
1226 28908 : if (NS_FAILED(rv)) return rv;
1227 :
1228 28908 : if (count || !isAnonymous) {
1229 : // If the resource was "anonymous" (i.e., they hadn't
1230 : // explicitly set an ID or resource attribute), then we'll
1231 : // only assert this property from the context element *if*
1232 : // there were properties specified on the anonymous
1233 : // resource.
1234 32 : rv = mDataSource->Assert(GetContextElement(0), property, target, true);
1235 32 : if (NS_FAILED(rv)) return rv;
1236 : }
1237 :
1238 : // XXX Technically, we should _not_ fall through here and push
1239 : // the element onto the stack: this is supposed to be a closed
1240 : // node. But right now I'm lazy and the code will just Do The
1241 : // Right Thing so long as the RDF is well-formed.
1242 : }
1243 :
1244 : // Push the element onto the context stack and change state.
1245 28908 : PushContext(property, mState, mParseMode);
1246 28908 : mState = eRDFContentSinkState_InPropertyElement;
1247 28908 : SetParseMode(aAttributes);
1248 :
1249 28908 : return NS_OK;
1250 : }
1251 :
1252 : nsresult
1253 2004 : RDFContentSinkImpl::OpenMember(const PRUnichar* aName,
1254 : const PRUnichar** aAttributes)
1255 : {
1256 : // ensure that we're actually reading a member element by making
1257 : // sure that the opening tag is <rdf:li>, where "rdf:" corresponds
1258 : // to whatever they've declared the standard RDF namespace to be.
1259 : nsresult rv;
1260 :
1261 4008 : nsCOMPtr<nsIAtom> localName;
1262 : const nsDependentSubstring& nameSpaceURI =
1263 4008 : SplitExpatName(aName, getter_AddRefs(localName));
1264 :
1265 4008 : if (!nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) ||
1266 2004 : localName != kLiAtom) {
1267 0 : PR_LOG(gLog, PR_LOG_ALWAYS,
1268 : ("rdfxml: expected RDF:li at line %d",
1269 : -1)); // XXX pass in line number
1270 :
1271 0 : return NS_ERROR_UNEXPECTED;
1272 : }
1273 :
1274 : // The parent element is the container.
1275 2004 : nsIRDFResource* container = GetContextElement(0);
1276 2004 : if (! container)
1277 0 : return NS_ERROR_NULL_POINTER;
1278 :
1279 : nsIRDFResource* resource;
1280 2004 : if (NS_SUCCEEDED(rv = GetResourceAttribute(aAttributes, &resource))) {
1281 : // Okay, this node has an RDF:resource="..." attribute. That
1282 : // means that it's a "referenced item," as covered in [6.29].
1283 260 : nsCOMPtr<nsIRDFContainer> c;
1284 130 : NS_NewRDFContainer(getter_AddRefs(c));
1285 130 : c->Init(mDataSource, container);
1286 130 : c->AppendElement(resource);
1287 :
1288 : // XXX Technically, we should _not_ fall through here and push
1289 : // the element onto the stack: this is supposed to be a closed
1290 : // node. But right now I'm lazy and the code will just Do The
1291 : // Right Thing so long as the RDF is well-formed.
1292 130 : NS_RELEASE(resource);
1293 : }
1294 :
1295 : // Change state. Pushing a null context element is a bit weird,
1296 : // but the idea is that there really is _no_ context "property".
1297 : // The contained element will use nsIRDFContainer::AppendElement() to add
1298 : // the element to the container, which requires only the container
1299 : // and the element to be added.
1300 2004 : PushContext(nsnull, mState, mParseMode);
1301 2004 : mState = eRDFContentSinkState_InMemberElement;
1302 2004 : SetParseMode(aAttributes);
1303 :
1304 2004 : return NS_OK;
1305 : }
1306 :
1307 :
1308 : nsresult
1309 7346 : RDFContentSinkImpl::OpenValue(const PRUnichar* aName, const PRUnichar** aAttributes)
1310 : {
1311 : // a "value" can either be an object or a string: we'll only get
1312 : // *here* if it's an object, as raw text is added as a leaf.
1313 7346 : return OpenObject(aName,aAttributes);
1314 : }
1315 :
1316 : ////////////////////////////////////////////////////////////////////////
1317 : // namespace resolution
1318 : void
1319 43788 : RDFContentSinkImpl::RegisterNamespaces(const PRUnichar **aAttributes)
1320 : {
1321 87576 : nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource);
1322 43788 : if (!sink) {
1323 : return;
1324 : }
1325 356 : NS_NAMED_LITERAL_STRING(xmlns, "http://www.w3.org/2000/xmlns/");
1326 602 : for (; *aAttributes; aAttributes += 2) {
1327 : // check the namespace
1328 424 : const PRUnichar* attr = aAttributes[0];
1329 424 : const PRUnichar* xmlnsP = xmlns.BeginReading();
1330 6615 : while (*attr == *xmlnsP) {
1331 5767 : ++attr;
1332 5767 : ++xmlnsP;
1333 : }
1334 442 : if (*attr != 0xFFFF ||
1335 18 : xmlnsP != xmlns.EndReading()) {
1336 406 : continue;
1337 : }
1338 : // get the localname (or "xmlns" for the default namespace)
1339 18 : const PRUnichar* endLocal = ++attr;
1340 83 : while (*endLocal && *endLocal != 0xFFFF) {
1341 47 : ++endLocal;
1342 : }
1343 36 : nsDependentSubstring lname(attr, endLocal);
1344 36 : nsCOMPtr<nsIAtom> preferred = do_GetAtom(lname);
1345 18 : if (preferred == kXMLNSAtom) {
1346 0 : preferred = nsnull;
1347 : }
1348 18 : sink->AddNameSpace(preferred, nsDependentString(aAttributes[1]));
1349 : }
1350 : }
1351 :
1352 : ////////////////////////////////////////////////////////////////////////
1353 : // Qualified name resolution
1354 :
1355 : const nsDependentSubstring
1356 51833 : RDFContentSinkImpl::SplitExpatName(const PRUnichar *aExpatName,
1357 : nsIAtom **aLocalName)
1358 : {
1359 : /**
1360 : * Expat can send the following:
1361 : * localName
1362 : * namespaceURI<separator>localName
1363 : * namespaceURI<separator>localName<separator>prefix
1364 : *
1365 : * and we use 0xFFFF for the <separator>.
1366 : *
1367 : */
1368 :
1369 51833 : const PRUnichar *uriEnd = aExpatName;
1370 51833 : const PRUnichar *nameStart = aExpatName;
1371 : const PRUnichar *pos;
1372 2202220 : for (pos = aExpatName; *pos; ++pos) {
1373 2183568 : if (*pos == 0xFFFF) {
1374 78236 : if (uriEnd != aExpatName) {
1375 33181 : break;
1376 : }
1377 :
1378 45055 : uriEnd = pos;
1379 45055 : nameStart = pos + 1;
1380 : }
1381 : }
1382 :
1383 103666 : const nsDependentSubstring& nameSpaceURI = Substring(aExpatName, uriEnd);
1384 51833 : *aLocalName = NS_NewAtom(Substring(nameStart, pos));
1385 51833 : return nameSpaceURI;
1386 : }
1387 :
1388 : nsresult
1389 1456 : RDFContentSinkImpl::InitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer)
1390 : {
1391 : // Do the right kind of initialization based on the container
1392 : // 'type' resource, and the state of the container (i.e., 'make' a
1393 : // new container vs. 'reinitialize' the container).
1394 : nsresult rv;
1395 :
1396 : static const ContainerInfo gContainerInfo[] = {
1397 : { &RDFContentSinkImpl::kRDF_Alt, &nsIRDFContainerUtils::IsAlt, &nsIRDFContainerUtils::MakeAlt },
1398 : { &RDFContentSinkImpl::kRDF_Bag, &nsIRDFContainerUtils::IsBag, &nsIRDFContainerUtils::MakeBag },
1399 : { &RDFContentSinkImpl::kRDF_Seq, &nsIRDFContainerUtils::IsSeq, &nsIRDFContainerUtils::MakeSeq },
1400 : { 0, 0, 0 },
1401 : };
1402 :
1403 8736 : for (const ContainerInfo* info = gContainerInfo; info->mType != 0; ++info) {
1404 4368 : if (*info->mType != aContainerType)
1405 2912 : continue;
1406 :
1407 : bool isContainer;
1408 1456 : rv = (gRDFContainerUtils->*(info->mTestFn))(mDataSource, aContainer, &isContainer);
1409 1456 : if (isContainer) {
1410 0 : rv = ReinitContainer(aContainerType, aContainer);
1411 : }
1412 : else {
1413 1456 : rv = (gRDFContainerUtils->*(info->mMakeFn))(mDataSource, aContainer, nsnull);
1414 : }
1415 1456 : return rv;
1416 : }
1417 :
1418 0 : NS_NOTREACHED("not an RDF container type");
1419 0 : return NS_ERROR_FAILURE;
1420 : }
1421 :
1422 :
1423 :
1424 : nsresult
1425 0 : RDFContentSinkImpl::ReinitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer)
1426 : {
1427 : // Mega-kludge to deal with the fact that Make[Seq|Alt|Bag] is
1428 : // idempotent, and as such, containers will have state (e.g.,
1429 : // RDF:nextVal) maintained in the graph across loads. This
1430 : // re-initializes each container's RDF:nextVal to '1', and 'marks'
1431 : // the container as such.
1432 : nsresult rv;
1433 :
1434 0 : nsCOMPtr<nsIRDFLiteral> one;
1435 0 : rv = gRDFService->GetLiteral(NS_LITERAL_STRING("1").get(), getter_AddRefs(one));
1436 0 : if (NS_FAILED(rv)) return rv;
1437 :
1438 : // Re-initialize the 'nextval' property
1439 0 : nsCOMPtr<nsIRDFNode> nextval;
1440 0 : rv = mDataSource->GetTarget(aContainer, kRDF_nextVal, true, getter_AddRefs(nextval));
1441 0 : if (NS_FAILED(rv)) return rv;
1442 :
1443 0 : rv = mDataSource->Change(aContainer, kRDF_nextVal, nextval, one);
1444 0 : if (NS_FAILED(rv)) return rv;
1445 :
1446 : // Re-mark as a container. XXX should be kRDF_type
1447 0 : rv = mDataSource->Assert(aContainer, kRDF_instanceOf, aContainerType, true);
1448 0 : NS_ASSERTION(NS_SUCCEEDED(rv), "unable to mark container as such");
1449 0 : if (NS_FAILED(rv)) return rv;
1450 :
1451 0 : return NS_OK;
1452 : }
1453 :
1454 : ////////////////////////////////////////////////////////////////////////
1455 : // Content stack management
1456 :
1457 : nsIRDFResource*
1458 61642 : RDFContentSinkImpl::GetContextElement(PRInt32 ancestor /* = 0 */)
1459 : {
1460 123284 : if ((nsnull == mContextStack) ||
1461 61642 : (PRUint32(ancestor) >= mContextStack->Length())) {
1462 0 : return nsnull;
1463 : }
1464 :
1465 : return mContextStack->ElementAt(
1466 61642 : mContextStack->Length()-ancestor-1).mResource;
1467 : }
1468 :
1469 : PRInt32
1470 43788 : RDFContentSinkImpl::PushContext(nsIRDFResource *aResource,
1471 : RDFContentSinkState aState,
1472 : RDFContentSinkParseMode aParseMode)
1473 : {
1474 43788 : if (! mContextStack) {
1475 2069 : mContextStack = new nsAutoTArray<RDFContextStackElement, 8>();
1476 2069 : if (! mContextStack)
1477 0 : return 0;
1478 : }
1479 :
1480 43788 : RDFContextStackElement* e = mContextStack->AppendElement();
1481 43788 : if (! e)
1482 0 : return mContextStack->Length();
1483 :
1484 43788 : e->mResource = aResource;
1485 43788 : e->mState = aState;
1486 43788 : e->mParseMode = aParseMode;
1487 :
1488 43788 : return mContextStack->Length();
1489 : }
1490 :
1491 : nsresult
1492 43788 : RDFContentSinkImpl::PopContext(nsIRDFResource *&aResource,
1493 : RDFContentSinkState &aState,
1494 : RDFContentSinkParseMode &aParseMode)
1495 : {
1496 87576 : if ((nsnull == mContextStack) ||
1497 43788 : (mContextStack->IsEmpty())) {
1498 0 : return NS_ERROR_NULL_POINTER;
1499 : }
1500 :
1501 43788 : PRUint32 i = mContextStack->Length() - 1;
1502 43788 : RDFContextStackElement &e = mContextStack->ElementAt(i);
1503 :
1504 43788 : aResource = e.mResource;
1505 43788 : NS_IF_ADDREF(aResource);
1506 43788 : aState = e.mState;
1507 43788 : aParseMode = e.mParseMode;
1508 :
1509 43788 : mContextStack->RemoveElementAt(i);
1510 43788 : return NS_OK;
1511 : }
1512 :
1513 :
1514 : ////////////////////////////////////////////////////////////////////////
1515 :
1516 : nsresult
1517 2072 : NS_NewRDFContentSink(nsIRDFContentSink** aResult)
1518 : {
1519 2072 : NS_PRECONDITION(aResult != nsnull, "null ptr");
1520 2072 : if (! aResult)
1521 0 : return NS_ERROR_NULL_POINTER;
1522 :
1523 2072 : RDFContentSinkImpl* sink = new RDFContentSinkImpl();
1524 2072 : if (! sink)
1525 0 : return NS_ERROR_OUT_OF_MEMORY;
1526 :
1527 2072 : NS_ADDREF(sink);
1528 2072 : *aResult = sink;
1529 2072 : return NS_OK;
1530 : }
|