turtleparser.java

这是外国一个开源推理机
JAVA
第 1 页 / 共 2 页
字号:
12 下一页
/*  Sesame - Storage and Querying architecture for RDF and RDF Schema *  Copyright (C) 2001-2005 Aduna * *  Contact:  *  	Aduna *  	Prinses Julianaplein 14 b *  	3817 CS Amersfoort *  	The Netherlands *  	tel. +33 (0)33 465 99 87 *  	fax. +33 (0)33 465 99 87 * *  	http://aduna.biz/ *  	http://www.openrdf.org/ *   *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Lesser General Public *  License as published by the Free Software Foundation; either *  version 2.1 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Lesser General Public License for more details. * *  You should have received a copy of the GNU Lesser General Public *  License along with this library; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package org.openrdf.rio.turtle;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.LineNumberReader;import java.io.PushbackReader;import java.io.Reader;import java.io.UnsupportedEncodingException;import java.util.HashMap;import java.util.Map;import org.openrdf.util.ASCIIUtil;import org.openrdf.util.xml.XmlDatatypeUtil;import org.openrdf.vocabulary.RDF;import org.openrdf.vocabulary.XmlSchema;import org.openrdf.model.BNode;import org.openrdf.model.Literal;import org.openrdf.model.Resource;import org.openrdf.model.URI;import org.openrdf.model.Value;import org.openrdf.model.ValueFactory;import org.openrdf.model.impl.ValueFactoryImpl;import org.openrdf.rio.NamespaceListener;import org.openrdf.rio.ParseErrorListener;import org.openrdf.rio.ParseException;import org.openrdf.rio.ParseLocationListener;import org.openrdf.rio.Parser;import org.openrdf.rio.StatementHandler;import org.openrdf.rio.StatementHandlerException;/** * Parser for Turtle files. A specification of Turtle can be found * <a href="http://www.ilrt.bris.ac.uk/discovery/2004/01/turtle/">in this document</a>. * This parser is not thread-safe, therefore its public methods are * synchronized. **/public class TurtleParser implements Parser {/*--------------+| Variables     |+--------------*/	private StatementHandler _statementHandler;	private NamespaceListener _nsListener;	private ParseErrorListener _errListener;	private ParseLocationListener _locListener;	/** The base URI for resolving relative URIs. **/	private org.openrdf.util.uri.URI _baseURI;	private LineNumberReader _lineReader;	private PushbackReader _reader;		private ValueFactory _valFactory;	/**	 * Mapping from bNode ID's as used in the RDF document to the	 * object created for it by the ValueFactory.	 **/	private Map _bNodeIdMap;	private Map _namespaceTable;	private Resource _subject;	private URI _predicate;	private Value _object;	/** Flag indicating whether the parser should check the data it parses. **/	boolean _verifyData = true;	/** 	 * Flag indicating whether the parser should preserve bnode identifiers	 * specified in the source.	 */	boolean _preserveBNodeIds = false;		/**	 * Indicates how datatyped literals should be handled. Legal	 * values are <tt>DT_IGNORE</tt>, <tt>DT_VERIFY</tt> and	 * <tt>DT_NORMALIZE</tt>.	 **/	private int _datatypeHandling;/*--------------+| Constructors  |+--------------*/	/**	 * Creates a new TurtleParser that will use a <tt>ValueFactoryImpl</tt> to	 * create object for resources, bNodes and literals.	 * @see org.openrdf.model.impl.ValueFactoryImpl	 **/	public TurtleParser() {		this(new ValueFactoryImpl());	}	/**	 * Creates a new TurtleParser that will use the supplied ValueFactory to	 * create objects for resources, bNodes and literals.	 *	 * @param valueFactory A ValueFactory.	 **/	public TurtleParser(ValueFactory valueFactory) {		_valFactory = valueFactory;		_bNodeIdMap = new HashMap();		_datatypeHandling = DT_VERIFY;		_namespaceTable = new HashMap(16);	}/*--------------+| Methods       |+--------------*/	// Implements Parser.setStatementHandler(StatementHandler)	public synchronized void setStatementHandler(StatementHandler sh) {		_statementHandler = sh;	}	// Implements Parser.setNamespaceListener(NamespaceListener)	public void setNamespaceListener(NamespaceListener nl) {		_nsListener = nl;	}	// Implements Parser.setParseErrorListener(ParseErrorListener)	public synchronized void setParseErrorListener(ParseErrorListener el) {		_errListener = el;	}	// Implements Parser.setParseLocationListener(ParseLocationListener)	public synchronized void setParseLocationListener(ParseLocationListener el) {		_locListener = el;	}	// Implements Parser.setVerifyData(boolean)	public synchronized void setVerifyData(boolean verifyData) {		_verifyData = verifyData;	}	// Implements Parser.setPreserveBNodeIds(boolean)	public void setPreserveBNodeIds(boolean preserveBNodeIds) {		_preserveBNodeIds = preserveBNodeIds;	}	// Implements Parser.setStopAtFirstError(boolean)	public synchronized void setStopAtFirstError(boolean stopAtFirstError) {		// ignore	}	// Implements Parser.setDatatypeHandling(int)	public void setDatatypeHandling(int datatypeHandling) {		_datatypeHandling = datatypeHandling;	}	/**	 * Implementation of the <tt>parse(InputStream, String)</tt> method defined	 * in the Parser interface. 		 * 	 * @param in The InputStream from which to read the data. The InputStream is	 * supposed to contain UTF-8 encoded Unicode characters, as per the Turtle	 * specification.	 * @param baseURI The URI associated with the data in the InputStream.	 * @exception IOException If an I/O error occurred while data was read	 * from the InputStream.	 * @exception ParseException If the parser has found an unrecoverable	 * parse error.	 * @exception StatementHandler If the configured statement handler	 * encountered an unrecoverable error.	 * @exception IllegalArgumentException If the supplied input stream or	 * base URI is <tt>null</tt>.	 **/	public synchronized void parse(InputStream in, String baseURI)		throws IOException, ParseException, StatementHandlerException	{		if (in == null) {			throw new IllegalArgumentException("Input stream can not be 'null'");		}		// Note: baseURI will be checked in parse(Reader, String)		try {			parse(new InputStreamReader(in, "UTF-8"), baseURI);		}		catch (UnsupportedEncodingException e) {			// Every platform should support the UTF-8 encoding...			throw new RuntimeException(e);		}	}	/**	 * Implementation of the <tt>parse(Reader, String)</tt> method defined in	 * the Parser interface. 		 * 	 * @param reader The Reader from which to read the data.	 * @param baseURI The URI associated with the data in the Reader.	 * @exception IOException If an I/O error occurred while data was read	 * from the InputStream.	 * @exception ParseException If the parser has found an unrecoverable	 * parse error.	 * @exception StatementHandler If the configured statement handler	 * encountered an unrecoverable error.	 * @exception IllegalArgumentException If the supplied reader or base URI	 * is <tt>null</tt>.	 **/	public synchronized void parse(Reader reader, String baseURI)		throws IOException, ParseException, StatementHandlerException	{		if (reader == null) {			throw new IllegalArgumentException("Reader can not be 'null'");		}		if (baseURI == null) {			throw new IllegalArgumentException("base URI can not be 'null'");		}		_lineReader = new LineNumberReader(reader);		// Start counting lines at 1:		_lineReader.setLineNumber(1);		// Allow at most 2 characters to be pushed back:		_reader = new PushbackReader(_lineReader, 2);		// Store normalized base URI		_baseURI = new org.openrdf.util.uri.URI(baseURI);		_baseURI.normalize();		_reportLocation();		try {			_skipWhitespace();			int c = _peek();			while (c != -1) {				if (c == '#') {					// Comment, ignore line				 	_skipLine();				}				else if (c == '@') {					_parsePrefix();				}				else {					_parseTriple();				}				_skipWhitespace();				c = _peek();			}		}		finally {			_bNodeIdMap.clear();			_namespaceTable.clear();		}	}	/**	 * Reads characters from _reader until it finds a character that is not	 * a space, tab, line feed or newline.	 **/	private void _skipWhitespace()		throws IOException	{		int c = _reader.read();		while (TurtleUtil.isWhitespace(c)) {			c = _reader.read();		}		_unread(c);	}	/**	 * Reads characters from _reader until the first EOL has been read. The	 * first character after the EOL is returned. In case the end of the	 * character stream has been reached, -1 is returned.	 **/	private void _skipLine()		throws IOException	{		int c = _reader.read();		while (c != -1 && c != 0xD && c != 0xA) {			c = _reader.read();		}		// c is equal to -1, \r or \n.		// In case c is equal to \r, we should also read a following \n.		if (c == 0xD) {			c = _reader.read();			if (c != 0xA) {				_unread(c);			}		}		_reportLocation();	}	private void _parsePrefix()		throws IOException, ParseException	{		// Verify that the first characters form the string "prefix"		_verifyCharacter(_reader.read(), "@");		_verifyCharacter(_reader.read(), "p");		_verifyCharacter(_reader.read(), "r");		_verifyCharacter(_reader.read(), "e");		_verifyCharacter(_reader.read(), "f");		_verifyCharacter(_reader.read(), "i");		_verifyCharacter(_reader.read(), "x");		_skipWhitespace();		// Read prefix ID (e.g. "rdf:" or ":")		StringBuffer prefixID = new StringBuffer(8);		int c = _reader.read();		while (c != ':') {			if (c == -1) {				_throwEOFException();			}			prefixID.append( (char)c );			c = _reader.read();		}		// c now contains the ':', ignore it		_reader.read();		_skipWhitespace();		// Read the namespace URI		URI namespace = _parseUriRef();		_skipWhitespace();		// Read closing dot		_verifyCharacter(_reader.read(), ".");		// Store namespace mapping and report it to the namespace listener (if any).		String prefixStr = prefixID.toString();		String namespaceStr = namespace.getURI();		_namespaceTable.put(prefixStr, namespaceStr);		if (_nsListener != null) {			_nsListener.handleNamespace(prefixStr, namespaceStr);		}	}	private void _parseTriple()		throws IOException, ParseException, StatementHandlerException	{		_parseSubject();		_skipWhitespace();		_parsePredicateObjectList();		_skipWhitespace();		_verifyCharacter(_reader.read(), ".");		_subject = null;		_predicate = null;		_object = null;	}	private void _parsePredicateObjectList()		throws IOException, ParseException, StatementHandlerException	{		_predicate = _parsePredicate();		_skipWhitespace();		_parseObjectList();		_skipWhitespace();		int c = _peek();		while (c == ';') {			_reader.read();			_skipWhitespace();			c = _peek();			if (c == '.' || // end of triple				c == ']') // end of predicateObjectList inside blank node			{				break;			}			_predicate = _parsePredicate();			_skipWhitespace();			_parseObjectList();			_skipWhitespace();			c = _peek();		}	}	private void _parseObjectList()		throws IOException, ParseException, StatementHandlerException	{		_parseObject();				_skipWhitespace();		int c = _peek();		while (c == ',') {			_reader.read();			_skipWhitespace();			_parseObject();			_skipWhitespace();			c = _peek();		}	}	private void _parseSubject()		throws IOException, ParseException, StatementHandlerException	{		int c = _peek();		if (c == '<' || c == ':' || TurtleUtil.isPrefixStartChar(c)) {			// subject is an uriref or a qname			_subject = _parseResource();		}		else if (c == '_' || c == '[' || c == '(') {			// subject is a blank node (or an empty list)			_subject = _parseBlank();		}		else if (c == -1) {			_throwEOFException();		}		else {			_throwParseException("Expected an uriref, qname or node ID, found '" + (char)c + "'");		}	}	private URI _parsePredicate()		throws IOException, ParseException	{		// Check the short-cut 'a' is used		int c1 = _reader.read();		if (c1 == 'a') {			int c2 = _reader.read();			if (TurtleUtil.isWhitespace(c2)) {				// Short-cut is used, return the rdf:type URI				return _createURI(RDF.TYPE);			}			// Short-cut is not used, unread all characters			_unread(c2);		}		_unread(c1);		// Predicate is a normal resource		return _parseResource();	}	private void _parseObject()		throws IOException, ParseException, StatementHandlerException	{		int c = _peek();		if (c == '<' || c == ':' || TurtleUtil.isPrefixStartChar(c)) {			// object is an uriref or a qname			_object = _parseResource();		}		else if (c == '_' || c == '[' || c == '(') {			// object is a blank node (or an empty list)			_object = _parseBlank();		}		else if (c == '"') {			// object is a literal			_object = _parseLiteral();		}		else if (ASCIIUtil.isNumber(c)) {			// object is an integer			_object = _parseInteger();		}		else if (c == -1) {			_throwEOFException();		}		else {			_throwParseException("Expected resource, node ID, literal or integer, found '" + (char)c + "'");		}		_statementHandler.handleStatement(_subject, _predicate, _object);	}	private Literal _parseLiteral()		throws IOException, ParseException	{		// First character should be '"'		_verifyCharacter(_reader.read(), "\"");		// Read up to the next '"' character		StringBuffer label = new StringBuffer(32);		int c = _reader.read();		while (c != '"') {			if (c == -1) {				_throwEOFException();			}			label.append( (char)c );
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -