n3.g

Jena推理机
第 1 页 / 共 2 页
字号:
12 下一页
//-*- mode: antlr -*-

/* This is part of the Jena RDF Framework.
 * (c) Copyright 2002-2003, Hewlett-Packard Company, all rights reserved.
 * [See end of file for details]
 */

/* This N3 grammar is based on:
 *     http://www.w3.org/DesignIssues/Notation3.html
 *       The grammar uses the rule names in this (as of August 2002)
 *     Tests in http://www.w3.org/2000/10/swap/test/syntax
 *     http://www.w3.org/2000/10/swap/rdfn3.g
 *     http://www.w3.org/2000/10/swap/rdfn3-gram.html
 *
 * For information about N3:
 *     http://infomesh.net/2002/notation3/
 *       Has references to other grammar for N3
 *     http://notabug.com/2002/n3/
 *
 * Grammar notes: 
 *         http://infomesh.net/2002/notation3/#deprecated
 *  N3 is UTF-8
 * 
 * N-Triples is defined in 
 *   http://www.w3.org/TR/rdf-testcases/
 * which is where the lang tag / datatype syntax comes from
 */


header
{
package com.hp.hpl.jena.n3 ;
import java.io.* ;
import antlr.TokenStreamRecognitionException ;
}


class N3AntlrParser extends Parser ;
options
{
	// We stop parsing on any error
	defaultErrorHandler = false ;
    k = 1 ; 
    buildAST = true;
}

tokens
{
	ANON ; FORMULA ;
	QNAME ; SEP ; KEYWORD ; NAME_OP ;
	KW_THIS ; KW_OF ; KW_HAS ; KW_A ; KW_IS ;
	// Tokens for lists : next stage chooses the namespace for lists
	TK_LIST ; TK_LIST_FIRST ; TK_LIST_REST ; TK_LIST_NIL ;
	AT_PREFIX ; AT_LANG ;
	STRING ; LITERAL ;
}

// Parser operations for emitting results.
//
// This is a streaming parser: the AST is only built for statements
// and directives, not for the whole file.  Triples and directives are
// output as soon as they are found.
//
// The event handler will need to do some work to maintain the bNode
// references (_:xxxx in the file and generated anon:NNNN) and to maintain
// the prefix mapping in generate quads.
//
// The event handler needs to filter for non-RDF-isms.

{
	// Extra code for the parser.

	boolean emitListTypeQuad = false ;

	N3AntlrLexer lexer = null ;
	void setLexer(N3AntlrLexer _lexer) { lexer = _lexer ; }

	// Internallly generated anon id.  Avoid clash with _:xxx
	private int anonId = 0 ;
	private String genAnonId() { return "=:"+(anonId++) ; }

	// Forumla zero is the outer context.  Avoid clash with other labels.
	private int formulaId = 1 ;
	private String genFormulaId() { return "{}:"+(formulaId++) ; }

	private N3ParserEventHandler handler = null ;

	public void setEventHandler(N3ParserEventHandler h) { this.handler = h ; }

	private void startDocument()
	{
		if ( handler == null )
			throw new RuntimeException("N3AntlrParser: No sink specified") ;
		handler.startDocument() ;
	}

	private void endDocument() { handler.endDocument() ; }


	private void startFormula(String context)
	{
		handler.startFormula(lexer.getLine(), context) ;
	}

	private void endFormula(String context)
	{
		handler.endFormula(lexer.getLine(), context) ;
	}

	private String currentFormula = null ;

    private void emitQuad(AST subj, AST prop, AST obj)
	{ 
		handler.quad(lexer.getLine(), subj, prop, obj, currentFormula ) ;
	}

	private void directive(AST directive, AST arg)
	{
		handler.directive(lexer.getLine(),
						  directive, new AST[]{arg},
						  currentFormula) ;
	}

	private void directive(AST directive, AST arg1, AST arg2)
	{
		handler.directive(lexer.getLine(),
						  directive, new AST[]{arg1, arg2},
						  currentFormula) ;
	}

	public void reportError(RecognitionException ex)
	{
		handler.error(ex, "N3 error: ["+ex.line+":"+ex.column+"] "+ex.getMessage());
    }

    /** Parser error-reporting function can be overridden in subclass */
    public void reportError(String s)
    {
	    //System.err.println("N3AntlrParser(s): "+s);
		handler.error(null, "N3AntlrParser(s): ["+lexer.getLine()+":"+lexer.getColumn()+"] "+s) ;
    }
}

// The top level rule
document!: 
		{ startDocument() ; }
		(n3Directive | statement)*	// Not a statementList: must end in a SEP
		{ endDocument() ; }
		EOF ;

		exception
		catch [RecognitionException ex]
		{ reportError(ex) ; throw ex ; }
		catch [TokenStreamRecognitionException ex]
		{ reportError(ex.recog) ; throw ex.recog ; }


n3Directive!: n3Directive0 SEP! ;

n3Directive0!:
		d:AT_PREFIX ns:nsprefix u:uriref
		{directive(#d, #ns, #u);}
		;

// A statement is "item verb item." with various
// syntactic sugar for multiple properties and objects.
// "verb" is a node and also the shorthand forms: 'a', => = etc
// "item" is just a node presently.

statement!
	: statement0 SEP! ;

statement0!
	: subj:subject propertyList[#subj] ;	

// List of statements without, necessarily, a final SEP.
// Possible empty
formulaList!
	: (statement0|n3Directive0) (SEP formulaList)?
	| ;

subject
	: item ;

propertyList![AST subj]
	: NAME_OP! anonnode[subj] propertyList[subj]
	| propValue[subj] (SEMI propertyList[subj])?
	| 		// void : allows for [ :a :b ] and empty list "; .".
	;

propValue [AST subj]
	:  v1:verb objectList[subj, #v1]
		// Reverse the subject and object
	|  v2:verbReverse subjectList[subj, #v2]
	;

subjectList![AST oldSub, AST prop]
	: obj:item { emitQuad(#obj, prop, oldSub) ; }
		(COMMA subjectList[oldSub, prop])? ;

objectList! [AST subj, AST prop]
	: obj:item { emitQuad(subj,prop,#obj) ; }
		(COMMA objectList[subj, prop])?
    | // Allows for empty list ", ."
    ;


// Node, or path which evaluates to a node.
item
	: n:node
	(
		// Possible forward path 
		PATH! n1:node
		{ 
			AST a1 = #([ANON, genAnonId()]) ;
			emitQuad(#n, #n1, a1) ;
			#n = a1 ;
		}
	|
		// Possible backward path 
		RPATH! n2:node
		{
			AST a2 = #([ANON, genAnonId()]) ;
			emitQuad(a2, #n2, #n) ;
			#n = a2 ;
		}
	)*
	{ #item = #n ; } ;

	//:	node ;

testPoint!: v:verb { AntlrUtils.ast(System.out, #v) ; } ;


node
	:	qname
	|	uriref
	|	anonnode[null]
	|	literal
	|	kwTHIS
	|	variableDT
	;

// Keywords: do not use parser literals as things like URIREFs
// get misclassified.

kwTHIS: 	KW_THIS ;
kwOF!:		KW_OF   ;
kwHAS!:		KW_HAS  ;
kwA:    	KW_A    ;
kwIS!:    	KW_IS   ;



verb
	:  	item
	|   kwA
	|	EQUAL | ARROW_R | ARROW_L
	|   ARROW_PATH_L! node ARROW_PATH_R!			// Deprecated
	|	kwHAS! item
	;

// Verbs that reverse the sense of subject and object
verbReverse
	:	kwIS! n:node kwOF!
	;

// Label is set if we have seen a :- in a propertyList
anonnode[AST label]
    { String oldCxt = null ; String cxt = null ; }
		// BNode
	: LBRACK!
		{ if ( label == null )
	          label = #([ANON, genAnonId()]) ;
		  #anonnode = label ;
		}
		propertyList[label]
	  RBRACK!

		// Formula.
		// Push old formula context, generate new one.
	| LCURLY!
		{ oldCxt = currentFormula ;
		  if ( label == null )
	          label = #([FORMULA, genFormulaId()]) ;
	      cxt = label.getText() ;
		  currentFormula = cxt ;
		  startFormula(cxt) ;
		  #anonnode = label ;
		}
		formulaList
		{ endFormula(cxt) ; currentFormula = oldCxt ;}
	  RCURLY!

		// List syntax
	| LPAREN!
		list[label]
	  RPAREN!
	;

list[AST label]
	: i:item
	  {
	  	if ( label == null )
	          label = #([ANON, genAnonId()]) ;
		#list = label ;
	  }
	  // NB The list is generated from tail to head
	  // because we recurse, then generate quads
	  n:list[null]	
	  {
	  	if ( emitListTypeQuad )
	  	    emitQuad(label, #([KW_A, "list"]), #([TK_LIST, "List"]) );
	    emitQuad(label,  #([TK_LIST_FIRST, "first"]),   #i);
		emitQuad(label,  #([TK_LIST_REST, "rest"]),     #n) ;
	  }
	| { #list = #([TK_LIST_NIL, "nil"]); } // void - generate list:nil
	;


	// Extract from the N-Tripes syntax
	//    literal ::= langString | datatypeString  
	//    langString ::= '"' string '"' ( '@' language )?  
	//    datatypeString ::= langString '^^' uriref 
	//    language ::= [a-z0-9]+ ('-' [a-z0-9]+ )? 
	// This is a permissive parse and allows the
	// lang and the datatype to be reversed.
	// Actually, the grammar allow two lang tags or twp datatype
	// specifications.
	// N3 adds numbers (XSD integers, XSD floats)

literal
	:	n:NUMBER
	| 	s:STRING literalModifier { #literal.setType(LITERAL) ; }
	;
	
literalModifier:
	literalModifier1 literalModifier1 ;
	
literalModifier1
	: (AT_LANG) => AT_LANG
	| (DATATYPE) => DATATYPE dt:datatype
		{ #literalModifier1 = #([DATATYPE], #dt) ; }
	|
	;

datatype:
	// Allowing a literal here is merely symetry.
	// We allow literals everywhere else.
	qname | uriref | variableNoDT | literal ;

// Restricted case for nsprefix.
nsprefix: ns:QNAME { ns.getText().endsWith(":") }? ;
	exception
	catch [SemanticException ex]
	{ 
		RecognitionException rEx =
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -