📄 n3.g
字号:
//-*- mode: antlr -*-
/* This is part of the Jena RDF Framework.
* (c) Copyright 2002-2003, Hewlett-Packard Company, all rights reserved.
* [See end of file for details]
*/
/* This N3 grammar is based on:
* http://www.w3.org/DesignIssues/Notation3.html
* The grammar uses the rule names in this (as of August 2002)
* Tests in http://www.w3.org/2000/10/swap/test/syntax
* http://www.w3.org/2000/10/swap/rdfn3.g
* http://www.w3.org/2000/10/swap/rdfn3-gram.html
*
* For information about N3:
* http://infomesh.net/2002/notation3/
* Has references to other grammar for N3
* http://notabug.com/2002/n3/
*
* Grammar notes:
* http://infomesh.net/2002/notation3/#deprecated
* N3 is UTF-8
*
* N-Triples is defined in
* http://www.w3.org/TR/rdf-testcases/
* which is where the lang tag / datatype syntax comes from
*/
header
{
package com.hp.hpl.jena.n3 ;
import java.io.* ;
import antlr.TokenStreamRecognitionException ;
}
class N3AntlrParser extends Parser ;
options
{
// We stop parsing on any error
defaultErrorHandler = false ;
k = 1 ;
buildAST = true;
}
tokens
{
ANON ; FORMULA ;
QNAME ; SEP ; KEYWORD ; NAME_OP ;
KW_THIS ; KW_OF ; KW_HAS ; KW_A ; KW_IS ;
// Tokens for lists : next stage chooses the namespace for lists
TK_LIST ; TK_LIST_FIRST ; TK_LIST_REST ; TK_LIST_NIL ;
AT_PREFIX ; AT_LANG ;
STRING ; LITERAL ;
}
// Parser operations for emitting results.
//
// This is a streaming parser: the AST is only built for statements
// and directives, not for the whole file. Triples and directives are
// output as soon as they are found.
//
// The event handler will need to do some work to maintain the bNode
// references (_:xxxx in the file and generated anon:NNNN) and to maintain
// the prefix mapping in generate quads.
//
// The event handler needs to filter for non-RDF-isms.
{
// Extra code for the parser.
boolean emitListTypeQuad = false ;
N3AntlrLexer lexer = null ;
void setLexer(N3AntlrLexer _lexer) { lexer = _lexer ; }
// Internallly generated anon id. Avoid clash with _:xxx
private int anonId = 0 ;
private String genAnonId() { return "=:"+(anonId++) ; }
// Forumla zero is the outer context. Avoid clash with other labels.
private int formulaId = 1 ;
private String genFormulaId() { return "{}:"+(formulaId++) ; }
private N3ParserEventHandler handler = null ;
public void setEventHandler(N3ParserEventHandler h) { this.handler = h ; }
private void startDocument()
{
if ( handler == null )
throw new RuntimeException("N3AntlrParser: No sink specified") ;
handler.startDocument() ;
}
private void endDocument() { handler.endDocument() ; }
private void startFormula(String context)
{
handler.startFormula(lexer.getLine(), context) ;
}
private void endFormula(String context)
{
handler.endFormula(lexer.getLine(), context) ;
}
private String currentFormula = null ;
private void emitQuad(AST subj, AST prop, AST obj)
{
handler.quad(lexer.getLine(), subj, prop, obj, currentFormula ) ;
}
private void directive(AST directive, AST arg)
{
handler.directive(lexer.getLine(),
directive, new AST[]{arg},
currentFormula) ;
}
private void directive(AST directive, AST arg1, AST arg2)
{
handler.directive(lexer.getLine(),
directive, new AST[]{arg1, arg2},
currentFormula) ;
}
public void reportError(RecognitionException ex)
{
handler.error(ex, "N3 error: ["+ex.line+":"+ex.column+"] "+ex.getMessage());
}
/** Parser error-reporting function can be overridden in subclass */
public void reportError(String s)
{
//System.err.println("N3AntlrParser(s): "+s);
handler.error(null, "N3AntlrParser(s): ["+lexer.getLine()+":"+lexer.getColumn()+"] "+s) ;
}
}
// The top level rule
document!:
{ startDocument() ; }
(n3Directive | statement)* // Not a statementList: must end in a SEP
{ endDocument() ; }
EOF ;
exception
catch [RecognitionException ex]
{ reportError(ex) ; throw ex ; }
catch [TokenStreamRecognitionException ex]
{ reportError(ex.recog) ; throw ex.recog ; }
n3Directive!: n3Directive0 SEP! ;
n3Directive0!:
d:AT_PREFIX ns:nsprefix u:uriref
{directive(#d, #ns, #u);}
;
// A statement is "item verb item." with various
// syntactic sugar for multiple properties and objects.
// "verb" is a node and also the shorthand forms: 'a', => = etc
// "item" is just a node presently.
statement!
: statement0 SEP! ;
statement0!
: subj:subject propertyList[#subj] ;
// List of statements without, necessarily, a final SEP.
// Possible empty
formulaList!
: (statement0|n3Directive0) (SEP formulaList)?
| ;
subject
: item ;
propertyList![AST subj]
: NAME_OP! anonnode[subj] propertyList[subj]
| propValue[subj] (SEMI propertyList[subj])?
| // void : allows for [ :a :b ] and empty list "; .".
;
propValue [AST subj]
: v1:verb objectList[subj, #v1]
// Reverse the subject and object
| v2:verbReverse subjectList[subj, #v2]
;
subjectList![AST oldSub, AST prop]
: obj:item { emitQuad(#obj, prop, oldSub) ; }
(COMMA subjectList[oldSub, prop])? ;
objectList! [AST subj, AST prop]
: obj:item { emitQuad(subj,prop,#obj) ; }
(COMMA objectList[subj, prop])?
| // Allows for empty list ", ."
;
// Node, or path which evaluates to a node.
item
: n:node
(
// Possible forward path
PATH! n1:node
{
AST a1 = #([ANON, genAnonId()]) ;
emitQuad(#n, #n1, a1) ;
#n = a1 ;
}
|
// Possible backward path
RPATH! n2:node
{
AST a2 = #([ANON, genAnonId()]) ;
emitQuad(a2, #n2, #n) ;
#n = a2 ;
}
)*
{ #item = #n ; } ;
//: node ;
testPoint!: v:verb { AntlrUtils.ast(System.out, #v) ; } ;
node
: qname
| uriref
| anonnode[null]
| literal
| kwTHIS
| variableDT
;
// Keywords: do not use parser literals as things like URIREFs
// get misclassified.
kwTHIS: KW_THIS ;
kwOF!: KW_OF ;
kwHAS!: KW_HAS ;
kwA: KW_A ;
kwIS!: KW_IS ;
verb
: item
| kwA
| EQUAL | ARROW_R | ARROW_L
| ARROW_PATH_L! node ARROW_PATH_R! // Deprecated
| kwHAS! item
;
// Verbs that reverse the sense of subject and object
verbReverse
: kwIS! n:node kwOF!
;
// Label is set if we have seen a :- in a propertyList
anonnode[AST label]
{ String oldCxt = null ; String cxt = null ; }
// BNode
: LBRACK!
{ if ( label == null )
label = #([ANON, genAnonId()]) ;
#anonnode = label ;
}
propertyList[label]
RBRACK!
// Formula.
// Push old formula context, generate new one.
| LCURLY!
{ oldCxt = currentFormula ;
if ( label == null )
label = #([FORMULA, genFormulaId()]) ;
cxt = label.getText() ;
currentFormula = cxt ;
startFormula(cxt) ;
#anonnode = label ;
}
formulaList
{ endFormula(cxt) ; currentFormula = oldCxt ;}
RCURLY!
// List syntax
| LPAREN!
list[label]
RPAREN!
;
list[AST label]
: i:item
{
if ( label == null )
label = #([ANON, genAnonId()]) ;
#list = label ;
}
// NB The list is generated from tail to head
// because we recurse, then generate quads
n:list[null]
{
if ( emitListTypeQuad )
emitQuad(label, #([KW_A, "list"]), #([TK_LIST, "List"]) );
emitQuad(label, #([TK_LIST_FIRST, "first"]), #i);
emitQuad(label, #([TK_LIST_REST, "rest"]), #n) ;
}
| { #list = #([TK_LIST_NIL, "nil"]); } // void - generate list:nil
;
// Extract from the N-Tripes syntax
// literal ::= langString | datatypeString
// langString ::= '"' string '"' ( '@' language )?
// datatypeString ::= langString '^^' uriref
// language ::= [a-z0-9]+ ('-' [a-z0-9]+ )?
// This is a permissive parse and allows the
// lang and the datatype to be reversed.
// Actually, the grammar allow two lang tags or twp datatype
// specifications.
// N3 adds numbers (XSD integers, XSD floats)
literal
: n:NUMBER
| s:STRING literalModifier { #literal.setType(LITERAL) ; }
;
literalModifier:
literalModifier1 literalModifier1 ;
literalModifier1
: (AT_LANG) => AT_LANG
| (DATATYPE) => DATATYPE dt:datatype
{ #literalModifier1 = #([DATATYPE], #dt) ; }
|
;
datatype:
// Allowing a literal here is merely symetry.
// We allow literals everywhere else.
qname | uriref | variableNoDT | literal ;
// Restricted case for nsprefix.
nsprefix: ns:QNAME { ns.getText().endsWith(":") }? ;
exception
catch [SemanticException ex]
{
RecognitionException rEx =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -