📄 turtleparser.java
字号:
/* Sesame - Storage and Querying architecture for RDF and RDF Schema * Copyright (C) 2001-2005 Aduna * * Contact: * Aduna * Prinses Julianaplein 14 b * 3817 CS Amersfoort * The Netherlands * tel. +33 (0)33 465 99 87 * fax. +33 (0)33 465 99 87 * * http://aduna.biz/ * http://www.openrdf.org/ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.openrdf.rio.turtle;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.LineNumberReader;import java.io.PushbackReader;import java.io.Reader;import java.io.UnsupportedEncodingException;import java.util.HashMap;import java.util.Map;import org.openrdf.util.ASCIIUtil;import org.openrdf.util.xml.XmlDatatypeUtil;import org.openrdf.vocabulary.RDF;import org.openrdf.vocabulary.XmlSchema;import org.openrdf.model.BNode;import org.openrdf.model.Literal;import org.openrdf.model.Resource;import org.openrdf.model.URI;import org.openrdf.model.Value;import org.openrdf.model.ValueFactory;import org.openrdf.model.impl.ValueFactoryImpl;import org.openrdf.rio.NamespaceListener;import org.openrdf.rio.ParseErrorListener;import org.openrdf.rio.ParseException;import org.openrdf.rio.ParseLocationListener;import org.openrdf.rio.Parser;import org.openrdf.rio.StatementHandler;import org.openrdf.rio.StatementHandlerException;/** * Parser for Turtle files. A specification of Turtle can be found * <a href="http://www.ilrt.bris.ac.uk/discovery/2004/01/turtle/">in this document</a>. * This parser is not thread-safe, therefore its public methods are * synchronized. **/public class TurtleParser implements Parser {/*--------------+| Variables |+--------------*/ private StatementHandler _statementHandler; private NamespaceListener _nsListener; private ParseErrorListener _errListener; private ParseLocationListener _locListener; /** The base URI for resolving relative URIs. **/ private org.openrdf.util.uri.URI _baseURI; private LineNumberReader _lineReader; private PushbackReader _reader; private ValueFactory _valFactory; /** * Mapping from bNode ID's as used in the RDF document to the * object created for it by the ValueFactory. **/ private Map _bNodeIdMap; private Map _namespaceTable; private Resource _subject; private URI _predicate; private Value _object; /** Flag indicating whether the parser should check the data it parses. **/ boolean _verifyData = true; /** * Flag indicating whether the parser should preserve bnode identifiers * specified in the source. */ boolean _preserveBNodeIds = false; /** * Indicates how datatyped literals should be handled. Legal * values are <tt>DT_IGNORE</tt>, <tt>DT_VERIFY</tt> and * <tt>DT_NORMALIZE</tt>. **/ private int _datatypeHandling;/*--------------+| Constructors |+--------------*/ /** * Creates a new TurtleParser that will use a <tt>ValueFactoryImpl</tt> to * create object for resources, bNodes and literals. * @see org.openrdf.model.impl.ValueFactoryImpl **/ public TurtleParser() { this(new ValueFactoryImpl()); } /** * Creates a new TurtleParser that will use the supplied ValueFactory to * create objects for resources, bNodes and literals. * * @param valueFactory A ValueFactory. **/ public TurtleParser(ValueFactory valueFactory) { _valFactory = valueFactory; _bNodeIdMap = new HashMap(); _datatypeHandling = DT_VERIFY; _namespaceTable = new HashMap(16); }/*--------------+| Methods |+--------------*/ // Implements Parser.setStatementHandler(StatementHandler) public synchronized void setStatementHandler(StatementHandler sh) { _statementHandler = sh; } // Implements Parser.setNamespaceListener(NamespaceListener) public void setNamespaceListener(NamespaceListener nl) { _nsListener = nl; } // Implements Parser.setParseErrorListener(ParseErrorListener) public synchronized void setParseErrorListener(ParseErrorListener el) { _errListener = el; } // Implements Parser.setParseLocationListener(ParseLocationListener) public synchronized void setParseLocationListener(ParseLocationListener el) { _locListener = el; } // Implements Parser.setVerifyData(boolean) public synchronized void setVerifyData(boolean verifyData) { _verifyData = verifyData; } // Implements Parser.setPreserveBNodeIds(boolean) public void setPreserveBNodeIds(boolean preserveBNodeIds) { _preserveBNodeIds = preserveBNodeIds; } // Implements Parser.setStopAtFirstError(boolean) public synchronized void setStopAtFirstError(boolean stopAtFirstError) { // ignore } // Implements Parser.setDatatypeHandling(int) public void setDatatypeHandling(int datatypeHandling) { _datatypeHandling = datatypeHandling; } /** * Implementation of the <tt>parse(InputStream, String)</tt> method defined * in the Parser interface. * * @param in The InputStream from which to read the data. The InputStream is * supposed to contain UTF-8 encoded Unicode characters, as per the Turtle * specification. * @param baseURI The URI associated with the data in the InputStream. * @exception IOException If an I/O error occurred while data was read * from the InputStream. * @exception ParseException If the parser has found an unrecoverable * parse error. * @exception StatementHandler If the configured statement handler * encountered an unrecoverable error. * @exception IllegalArgumentException If the supplied input stream or * base URI is <tt>null</tt>. **/ public synchronized void parse(InputStream in, String baseURI) throws IOException, ParseException, StatementHandlerException { if (in == null) { throw new IllegalArgumentException("Input stream can not be 'null'"); } // Note: baseURI will be checked in parse(Reader, String) try { parse(new InputStreamReader(in, "UTF-8"), baseURI); } catch (UnsupportedEncodingException e) { // Every platform should support the UTF-8 encoding... throw new RuntimeException(e); } } /** * Implementation of the <tt>parse(Reader, String)</tt> method defined in * the Parser interface. * * @param reader The Reader from which to read the data. * @param baseURI The URI associated with the data in the Reader. * @exception IOException If an I/O error occurred while data was read * from the InputStream. * @exception ParseException If the parser has found an unrecoverable * parse error. * @exception StatementHandler If the configured statement handler * encountered an unrecoverable error. * @exception IllegalArgumentException If the supplied reader or base URI * is <tt>null</tt>. **/ public synchronized void parse(Reader reader, String baseURI) throws IOException, ParseException, StatementHandlerException { if (reader == null) { throw new IllegalArgumentException("Reader can not be 'null'"); } if (baseURI == null) { throw new IllegalArgumentException("base URI can not be 'null'"); } _lineReader = new LineNumberReader(reader); // Start counting lines at 1: _lineReader.setLineNumber(1); // Allow at most 2 characters to be pushed back: _reader = new PushbackReader(_lineReader, 2); // Store normalized base URI _baseURI = new org.openrdf.util.uri.URI(baseURI); _baseURI.normalize(); _reportLocation(); try { _skipWhitespace(); int c = _peek(); while (c != -1) { if (c == '#') { // Comment, ignore line _skipLine(); } else if (c == '@') { _parsePrefix(); } else { _parseTriple(); } _skipWhitespace(); c = _peek(); } } finally { _bNodeIdMap.clear(); _namespaceTable.clear(); } } /** * Reads characters from _reader until it finds a character that is not * a space, tab, line feed or newline. **/ private void _skipWhitespace() throws IOException { int c = _reader.read(); while (TurtleUtil.isWhitespace(c)) { c = _reader.read(); } _unread(c); } /** * Reads characters from _reader until the first EOL has been read. The * first character after the EOL is returned. In case the end of the * character stream has been reached, -1 is returned. **/ private void _skipLine() throws IOException { int c = _reader.read(); while (c != -1 && c != 0xD && c != 0xA) { c = _reader.read(); } // c is equal to -1, \r or \n. // In case c is equal to \r, we should also read a following \n. if (c == 0xD) { c = _reader.read(); if (c != 0xA) { _unread(c); } } _reportLocation(); } private void _parsePrefix() throws IOException, ParseException { // Verify that the first characters form the string "prefix" _verifyCharacter(_reader.read(), "@"); _verifyCharacter(_reader.read(), "p"); _verifyCharacter(_reader.read(), "r"); _verifyCharacter(_reader.read(), "e"); _verifyCharacter(_reader.read(), "f"); _verifyCharacter(_reader.read(), "i"); _verifyCharacter(_reader.read(), "x"); _skipWhitespace(); // Read prefix ID (e.g. "rdf:" or ":") StringBuffer prefixID = new StringBuffer(8); int c = _reader.read(); while (c != ':') { if (c == -1) { _throwEOFException(); } prefixID.append( (char)c ); c = _reader.read(); } // c now contains the ':', ignore it _reader.read(); _skipWhitespace(); // Read the namespace URI URI namespace = _parseUriRef(); _skipWhitespace(); // Read closing dot _verifyCharacter(_reader.read(), "."); // Store namespace mapping and report it to the namespace listener (if any). String prefixStr = prefixID.toString(); String namespaceStr = namespace.getURI(); _namespaceTable.put(prefixStr, namespaceStr); if (_nsListener != null) { _nsListener.handleNamespace(prefixStr, namespaceStr); } } private void _parseTriple() throws IOException, ParseException, StatementHandlerException { _parseSubject(); _skipWhitespace(); _parsePredicateObjectList(); _skipWhitespace(); _verifyCharacter(_reader.read(), "."); _subject = null; _predicate = null; _object = null; } private void _parsePredicateObjectList() throws IOException, ParseException, StatementHandlerException { _predicate = _parsePredicate(); _skipWhitespace(); _parseObjectList(); _skipWhitespace(); int c = _peek(); while (c == ';') { _reader.read(); _skipWhitespace(); c = _peek(); if (c == '.' || // end of triple c == ']') // end of predicateObjectList inside blank node { break; } _predicate = _parsePredicate(); _skipWhitespace(); _parseObjectList(); _skipWhitespace(); c = _peek(); } } private void _parseObjectList() throws IOException, ParseException, StatementHandlerException { _parseObject(); _skipWhitespace(); int c = _peek(); while (c == ',') { _reader.read(); _skipWhitespace(); _parseObject(); _skipWhitespace(); c = _peek(); } } private void _parseSubject() throws IOException, ParseException, StatementHandlerException { int c = _peek(); if (c == '<' || c == ':' || TurtleUtil.isPrefixStartChar(c)) { // subject is an uriref or a qname _subject = _parseResource(); } else if (c == '_' || c == '[' || c == '(') { // subject is a blank node (or an empty list) _subject = _parseBlank(); } else if (c == -1) { _throwEOFException(); } else { _throwParseException("Expected an uriref, qname or node ID, found '" + (char)c + "'"); } } private URI _parsePredicate() throws IOException, ParseException { // Check the short-cut 'a' is used int c1 = _reader.read(); if (c1 == 'a') { int c2 = _reader.read(); if (TurtleUtil.isWhitespace(c2)) { // Short-cut is used, return the rdf:type URI return _createURI(RDF.TYPE); } // Short-cut is not used, unread all characters _unread(c2); } _unread(c1); // Predicate is a normal resource return _parseResource(); } private void _parseObject() throws IOException, ParseException, StatementHandlerException { int c = _peek(); if (c == '<' || c == ':' || TurtleUtil.isPrefixStartChar(c)) { // object is an uriref or a qname _object = _parseResource(); } else if (c == '_' || c == '[' || c == '(') { // object is a blank node (or an empty list) _object = _parseBlank(); } else if (c == '"') { // object is a literal _object = _parseLiteral(); } else if (ASCIIUtil.isNumber(c)) { // object is an integer _object = _parseInteger(); } else if (c == -1) { _throwEOFException(); } else { _throwParseException("Expected resource, node ID, literal or integer, found '" + (char)c + "'"); } _statementHandler.handleStatement(_subject, _predicate, _object); } private Literal _parseLiteral() throws IOException, ParseException { // First character should be '"' _verifyCharacter(_reader.read(), "\""); // Read up to the next '"' character StringBuffer label = new StringBuffer(32); int c = _reader.read(); while (c != '"') { if (c == -1) { _throwEOFException(); } label.append( (char)c );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -