📄 rdfxmlparser.java
字号:
/* Sesame - Storage and Querying architecture for RDF and RDF Schema * Copyright (C) 2001-2005 Aduna * * Contact: * Aduna * Prinses Julianaplein 14 b * 3817 CS Amersfoort * The Netherlands * tel. +33 (0)33 465 99 87 * fax. +33 (0)33 465 99 87 * * http://aduna.biz/ * http://www.openrdf.org/ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.openrdf.rio.rdfxml;import java.io.IOException;import java.io.InputStream;import java.io.Reader;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.Map;import java.util.Set;import java.util.Stack;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import org.xml.sax.XMLReader;import org.openrdf.util.xml.XMLReaderFactory;import org.openrdf.util.xml.XmlDatatypeUtil;import org.openrdf.util.xml.XmlUtil;import org.openrdf.vocabulary.RDF;import org.openrdf.model.BNode;import org.openrdf.model.Literal;import org.openrdf.model.Resource;import org.openrdf.model.URI;import org.openrdf.model.Value;import org.openrdf.model.ValueFactory;import org.openrdf.model.impl.ValueFactoryImpl;import org.openrdf.rio.NamespaceListener;import org.openrdf.rio.ParseErrorListener;import org.openrdf.rio.ParseException;import org.openrdf.rio.ParseLocationListener;import org.openrdf.rio.StatementHandler;import org.openrdf.rio.StatementHandlerException;/** * A parser for XML-serialized RDF. This parser operates directly * on the SAX events generated by a SAX-enabled XML parser. The XML parser * should be compliant with SAX2. You should specify which SAX parser should * be used by setting the <code>org.xml.sax.driver</code> property. * This parser is not thread-safe, therefore it's public methods are * synchronized. * <p> * To parse a document using this parser: * <ul> * <li>Create an instance of RdfXmlParser, optionally supplying it with your * own ValueFactory.</li> * <li>Set the StatementHandler.</li> * <li>Optionally, set the ParseErrorListener, ParseLocationListener and/or * NamespaceListener.</li> * <li>Optionally, specify whether the parser should verify the data it * parses and whether it should stop immediately when it finds an error in * the data (both default to <tt>true</tt>). * <li>Call the parse method.</li> * </ul> * Example code: * <pre> * // Use the SAX2-compliant Xerces parser: * System.setProperty( * "org.xml.sax.driver", * "org.apache.xerces.parsers.SAXParser"); * * Parser parser = new RdfXmlParser(); * parser.setStatementHandler(myStatementHandler); * parser.setParseErrorListener(myParseErrorListener); * parser.setVerifyData(true); * parser.stopAtFirstError(false); * * // Parse the data from inputStream, resolving any * // relative URIs against http://foo/bar: * parser.parse(inputStream, "http://foo/bar"); * </pre> * * @see org.openrdf.model.ValueFactory * @see org.openrdf.rio.StatementHandler * @see org.openrdf.rio.ParseErrorListener * @see org.openrdf.rio.ParseLocationListener * @see org.openrdf.rio.NamespaceListener **/public class RdfXmlParser implements org.openrdf.rio.Parser {/*------------------------------------------------------+| Frequently used resources |+------------------------------------------------------*/ /** The rdf:type resource. **/ private URI RDF_TYPE; /** The rdf:subject resource. **/ private URI RDF_SUBJECT; /** The rdf:predicate resource. **/ private URI RDF_PREDICATE; /** The rdf:object resource. **/ private URI RDF_OBJECT; /** The rdf:Statement resource. **/ private URI RDF_STATEMENT; /** The rdf:li resource. **/ private URI RDF_LI; /** The rdf:first resource. **/ private URI RDF_FIRST; /** The rdf:rest resource. **/ private URI RDF_REST; /** The rdf:nil resource. **/ private URI RDF_NIL;/*------------------------------------------------------+| Variables |+------------------------------------------------------*/ /** * A filter filtering calls to SAX methods specifically for this parser. **/ private SAXFilter _saxFilter; /** * A factory for creating resources, bNodes and literals. **/ private ValueFactory _valueFactory; /** * Mapping from bNode ID's as used in the RDF document to the * object created for it by the ValueFactory. **/ private Map _bNodeIdMap; /** * The object to report statements to. **/ private StatementHandler _statementHandler; /** * The object to report parse errors to. **/ private ParseErrorListener _errorListener; /** * The base URI for resolving relative URIs. This variable is set/modified * by the SAXFilter during parsing such that it always represents the URI * of the context in which elements are reported. **/ private org.openrdf.util.uri.URI _baseURI; /** * The base URI of the document. This variable is set when * <tt>parse(inputStream, baseURI)</tt> is called and will not be changed * during parsing. **/ private String _documentURI; /** * The language of literal values as can be specified using xml:lang * attributes. This variable is set/modified by the SAXFilter during * parsing such that it always represents the language of the context * in which elements are reported. **/ private String _xmlLang; /** * A stack of node- and property elements. **/ private Stack _elementStack = new Stack(); /** * A set containing URIs that have been generated as a result of rdf:ID * attributes. These URIs should be unique within a single document. **/ private Set _usedIDs = new HashSet(); /** * Flag indicating whether the parser should check the data it parses. **/ boolean _verifyData = true; /** * Flag indicating whether the parser should preserve bnode identifiers specified * in the source. */ boolean _preserveBNodeIds = false; /** * Indicates how datatyped literals should be handled. Legal * values are <tt>DT_IGNORE</tt>, <tt>DT_VERIFY</tt> and * <tt>DT_NORMALIZE</tt>. **/ private int _datatypeHandling; /** * Flag indicating whether the parser should stop parsing when it finds * an error in the data. **/ boolean _stopAtFirstError = true;/*------------------------------------------------------+| Constructors |+------------------------------------------------------*/ /** * Creates a new RdfXmlParser that will use a <tt>ValueFactoryImpl</tt> to * create objects for resources, bNodes and literals. * @see org.openrdf.model.impl.ValueFactoryImpl **/ public RdfXmlParser() { this(new ValueFactoryImpl()); } /** * Creates a new RdfXmlParser that will use the supplied ValueFactory to * create objects for resources, bNodes and literals. * * @param valueFactory A ValueFactory. **/ public RdfXmlParser(ValueFactory valueFactory) { _valueFactory = valueFactory; _bNodeIdMap = new HashMap(); _datatypeHandling = DT_VERIFY; RDF_TYPE = _valueFactory.createURI(RDF.TYPE); RDF_SUBJECT = _valueFactory.createURI(RDF.SUBJECT); RDF_PREDICATE = _valueFactory.createURI(RDF.PREDICATE); RDF_OBJECT = _valueFactory.createURI(RDF.OBJECT); RDF_STATEMENT = _valueFactory.createURI(RDF.STATEMENT); RDF_LI = _valueFactory.createURI(RDF.LI); RDF_FIRST = _valueFactory.createURI(RDF.FIRST); RDF_REST = _valueFactory.createURI(RDF.REST); RDF_NIL = _valueFactory.createURI(RDF.NIL); // SAXFilter does some filtering and verifying of SAX events _saxFilter = new SAXFilter(this); }/*------------------------------------------------------+| Methods from interface Parser |+------------------------------------------------------*/ // implements Parser.setStatementHandler(StatementHandler) public synchronized void setStatementHandler(StatementHandler sh) { _statementHandler = sh; } // implements Parser.setParseErrorListener(ParseErrorListener) public synchronized void setParseErrorListener(ParseErrorListener el) { _errorListener = el; } // implements Parser.setParseLocationListener(ParseLocationListener) public synchronized void setParseLocationListener(ParseLocationListener ll) { _saxFilter.setParseLocationListener(ll); } // implements Parser.setNamespaceListener(NamespaceListener) public synchronized void setNamespaceListener(NamespaceListener nl) { _saxFilter.setNamespaceListener(nl); } // implements Parser.setVerifyData(boolean) public synchronized void setVerifyData(boolean verifyData) { _verifyData = verifyData; } // implements Parser.setPreserveBNodeIds(boolean) public synchronized void setPreserveBNodeIds(boolean preserveBNodeIds) { _preserveBNodeIds = preserveBNodeIds; } // implements Parser.setStopAtFirstError(boolean) public synchronized void setStopAtFirstError(boolean stopAtFirstError) { _stopAtFirstError = stopAtFirstError; } // implements Parser.setDatatypeHandling(int) public void setDatatypeHandling(int datatypeHandling) { _datatypeHandling = datatypeHandling; } /** * Sets the parser in a mode to parse stand-alone RDF documents. In * stand-alone RDF documents, the enclosing <tt>rdf:RDF</tt> root element is * optional if this root element contains just one element (e.g. * <tt>rdf:Description</tt>. **/ public void setParseStandAloneDocuments(boolean standAloneDocs) { _saxFilter.setParseStandAloneDocuments(standAloneDocs); } /** * Returns whether the parser is currently in a mode to parse stand-alone * RDF documents. * * @see #setParseStandAloneDocuments **/ public boolean getParseStandAloneDocuments() { return _saxFilter.getParseStandAloneDocuments(); } /** * Parses the data from the supplied InputStream, using the supplied * baseURI to resolve any relative URI references. * * @param in The InputStream from which to read the data. * @param baseURI The URI associated with the data in the InputStream. * @exception IOException If an I/O error occurred while data was read * from the InputStream. * @exception ParseException If the parser has found an unrecoverable * parse error. * @exception StatementHandler If the configured statement handler * encountered an unrecoverable error. * @exception IllegalArgumentException If the supplied input stream or * base URI is <tt>null</tt>. **/ public synchronized void parse(InputStream in, String baseURI) throws IOException, ParseException, StatementHandlerException { if (in == null) { throw new IllegalArgumentException("Input stream cannot be 'null'"); } if (baseURI == null) { throw new IllegalArgumentException("Base URI cannot be 'null'"); } InputSource inputSource = new InputSource(in); inputSource.setSystemId(baseURI); _parse(inputSource); } /** * Parses the data from the supplied Reader, using the supplied baseURI * to resolve any relative URI references. * * @param reader The Reader from which to read the data. * @param baseURI The URI associated with the data in the InputStream. * @exception IOException If an I/O error occurred while data was read * from the InputStream. * @exception ParseException If the parser has found an unrecoverable * parse error. * @exception StatementHandlerException If the configured statement handler * has encountered an unrecoverable error. * @exception IllegalArgumentException If the supplied reader or base URI * is <tt>null</tt>. **/ public synchronized void parse(Reader reader, String baseURI) throws IOException, ParseException, StatementHandlerException { if (reader == null) { throw new IllegalArgumentException("Reader cannot be 'null'"); } if (baseURI == null) { throw new IllegalArgumentException("Base URI cannot be 'null'"); } InputSource inputSource = new InputSource(reader); inputSource.setSystemId(baseURI); _parse(inputSource); } private void _parse(InputSource inputSource) throws IOException, ParseException, StatementHandlerException { try { _documentURI = inputSource.getSystemId(); //_saxFilter.clear(); _saxFilter.setDocumentURI(_documentURI); XMLReader xmlReader = XMLReaderFactory.createXMLReader(); xmlReader.setContentHandler(_saxFilter); xmlReader.parse(inputSource); } catch (SAXParseException e) { Exception wrappedExc = e.getException(); if (wrappedExc == null) { wrappedExc = e; } throw new ParseException(wrappedExc, e.getLineNumber(), e.getColumnNumber()); } catch (SAXException e) { Exception wrappedExc = e.getException(); if (wrappedExc == null) { wrappedExc = e; } if (wrappedExc instanceof StatementHandlerException) { throw (StatementHandlerException)wrappedExc; } else { throw new ParseException(wrappedExc, -1, -1); } } finally { // Clean up _saxFilter.clear(); _baseURI = null; _xmlLang = null; _elementStack.clear(); _usedIDs.clear(); _bNodeIdMap.clear(); } }/*------------------------------------------------------+| Methods called by SAXFilter |+------------------------------------------------------*/ void setBaseURI(org.openrdf.util.uri.URI baseURI) { _baseURI = baseURI; } void setXmlLang(String xmlLang) { if ("".equals(xmlLang)) { _xmlLang = null; } else { _xmlLang = xmlLang; } } void startElement(String namespaceURI, String localName, String qName, Atts atts) throws SAXException { if (_topIsProperty()) { // this element represents the subject and/or object of a statement _processNodeElt(namespaceURI, localName, qName, atts, false); } else { // this element represents a property _processPropertyElt(namespaceURI, localName, qName, atts, false); } } void endElement(String namespaceURI, String localName, String qName) throws SAXException { Object topElement = _peekStack(0); if (topElement instanceof NodeElement) { // Check if top node is 'volatile', meaning that it doesn't have a // start- and end element associated with it.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -