n3.g

Jena推理机
第 1 页 / 共 2 页
字号:
上一页 12
            new RecognitionException("Illegal prefix: '"+ns.getText()+"'") ; 
		rEx.line = lexer.getLine() ; rEx.column = lexer.getColumn() ; 
		throw rEx ;
	}

qname: QNAME ; //| QNAME_ANON ;

uriref: URIREF ;

// There are two types of variable: one where it can
// be followed by a datatype and one where it can't
// The only place where it can't is in a datatype slot itself
//  i.e. "111"^^?x

variableDT:
	v:UVAR (DATATYPE dt:datatype )? { #variableDT = #(#[UVAR, v.getText()], dt) ; } ;

variableNoDT:
	v:UVAR ;


// --------------------------------------------------------

class N3AntlrLexer extends Lexer ;
options {
	k=3;		// Beause of """ and '''

	// UTF-8 expanded to Java chars
	// NB: antlr 2.7.1 \uFFFF is the EOF char
	// Not fixed in antlr 2.7.2
	charVocabulary= '\u0000'..'\uFFFE' ;
}


// One of QNAME, KEYWORD, NAME_OP, or a NUMBER
// Keywords are a little strange: (the letters for) a keyword
// could be part of a qname, either NS prefix or the local name.
// NSNAMEs can be digits.

THING
	// Order of syntactic predicates matters here

	// A qname (including the prefix used in @prefix)
	// and bNodes, using "_:"

	:	(NSNAME COLON LNAME)=>	NSNAME COLON LNAME	{ $setType(QNAME) ; }
	|	(COLON LNAME)=>			COLON LNAME			{ $setType(QNAME) ; }
	|	(NSNAME COLON )=>	    NSNAME COLON		{ $setType(QNAME) ; } 
	|	(COLON)=>			    COLON      			{ $setType(QNAME) ; } 
		// Named anon node
	|	(COLON '-') =>	":-"						{ $setType(NAME_OP) ; }
	|	(NUMBER) => NUMBER							{ $setType(NUMBER) ; }

		// Keywords: uses fact keywords can not be last in file (must be a .)
	|   ("has"    NON_ANC)=>	"has"				{ $setType(KW_HAS) ; }
	|   ("of"     NON_ANC)=>	"of"				{ $setType(KW_OF) ; }
	|   ("this"   NON_ANC)=>	"this"				{ $setType(KW_THIS) ; }
	|   ("a"      NON_ANC)=>	"a"					{ $setType(KW_A) ; }
	|   ("is"     NON_ANC)=>	"is"				{ $setType(KW_IS) ; }
	;

// Need to check against RFC2396 (code from Xerces?)
// Need to differentiate from "<=" 

URI_OR_IMPLIES
		: (ARROW_L) => ARROW_L { $setType(ARROW_L) ; }
		| (ARROW_MEANS) =>  ARROW_MEANS { $setType(ARROW_MEANS) ; }
		| URIREF  { $setType(URIREF) ; }
		;
		
// Needs to be protected ... or the antlr compiler loops ...
protected
URIREF:
	LANGLE! (options{greedy=false;}: ~('\n'|'\r'))* RANGLE! ;

// RDFC2396 + chars for limited IRI compatibility 
// processing to check URIref syntax and chanracter sets

protected
URICHAR:
	ALPHANUMERIC |
	// RFC 2396 unreserved
	'-' | '_' | '.' | '!' | '~' | '*' | "'" | '(' | ')' |
	// RFC 2396 reserved
	';' | '/' | '?' | ':' | '@' | '&' | '=' | '+' | '$' | ',' |
	// unwise
	'{' | '}' | '|' | '\\' | '^' | '[' | ']' | '`' |
	// Delims: Escape and ref
	'%' | '#' | '"' |
	// Not RFC2396 but here to help IRI compliance
	' '
	;

UVAR: QUESTION (ALPHANUMERIC)+ ;


// To cases of @word: dire3ctives (@prefix) and language tags.
// Can't have a language of "prefix".

AT_WORD
	: (AT "prefix") => AT "prefix" { $setType(AT_PREFIX) ; }
	| (AT (ALPHA)) => AT a:(ALPHA)+ ("-" (ALPHA)*)?
		{ $setType(AT_LANG) ; }
	;

// Align with XML 1.1 -- http://www.w3.org/TR/xml11/
// NameStartChar 
// NameChar
// Name ::= NameStartChar NameChar*

// Namepace 1.1 -- http://www.w3.org/TR/xml-names11/
// NCName ::= NCNameStartChar NCNameChar*
// NCNameChar       ::=    NameChar - ':' 
// NCNameStartChar  ::=    NameStartChar - ':'
// QName    ::=    PrefixedName 
//               | UnprefixedName 
// PrefixedName     ::=    Prefix ':' LocalPart  
// UnprefixedName   ::=    LocalPart  
// Prefix           ::=    NCName (does not start with numbers)
// LocalPart        ::=    NCName (does not start with numbers) 

//     NameChar and NameSartChar defined in XML 1.1
//     NameStartChar := ":" | [A-Z] | "_" | [a-z] |
//                      [#xC0-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
//                      [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
//                      [#x3001-#xD7FF] | [#xF900-#xEFFFF]
//     NameChar      := NameStartChar | "-" | "." | [0-9] | #xB7 |
//                         [#x0300-#x036F] | [#x203F-#x2040]


// This is NameChar except:
//   No dot - path separator.
//   Only up to FFFE, not EFFFF.
protected
XNAMECHAR: ( ('A'..'Z') | '_' | '-' | ('a'..'z') |
           ('\u00C0'..'\u02FF') | ('\u0370'..'\u037D') | ('\u037F'..'\u1FFF') |
           ('\u200C'..'\u200D') | ('\u2070'..'\u218F') | ('\u2C00'..'\u2FEF') |
           ('0'..'9') | ('\u0300'..'\u036F') | ('\u203F'..'\u2040') |
           '\u00B7'   | ('\u3001'..'\uD7FF') | ('\uF900'..'\uFFFE')
			) ;

protected
XNAME: (XNAMECHAR)* ;

// Namespace prefix name: include bNode ids.
protected // Prefix
//NSNAME: (ALPHANUMERIC|'_') (ALPHANUMERIC|'_'|'-')* ;
NSNAME: XNAME ;

// LNAME does not allow a start of '-' because it confuses with
// the name operator :-
// Also, N3 does not allow '.' in the localname part of a qname (although
// that is XML-legal) because N3 uses . as the end of statement separator
// or as a path separator or as a number decimal point.
// See N3JenaWriter, which avoids outputing qnames with a '.' in them.

protected // LocalPart
//LNAME: (ALPHANUMERIC|'_') (ALPHANUMERIC|'_'|'-')* ;
LNAME: XNAME ;

// Use lookahead as the DOT character is also the statement separator/terminator
// protected so parser uses THING to get these items, having checked that
// it isn't a QNAME
// Maybe split into INTEGER and DOUBLE using lookahead.
protected
NUMBER:  ('+'|'-')? ('0'..'9')+
		 ((DOT ('0'..'9')) => DOT ('0'..'9')+)?
         (('e'|'E') ('+'|'-')? ('0'..'9')+)? ;

STRING: ( STRING1 | STRING2 ) ;


SEP_OR_PATH	:	(DOT (ALPHA|'_'|COLON|LANGLE)) => DOT
					// Is immediately next char the start of a property 
					// qname (in the form "a:p" or ":p") or a URI
					{ $setType(PATH) ; }
				// End of file case (and all other cases)
			|	DOT								{ $setType(SEP) ; }
			;


// protected means the token or rule is not exposed to the parser 
// Named characters
protected
DOT			: '.' ;
//SEP			:	'.' ;

AT			:	'@'	;
LPAREN		:	'('	;
RPAREN		:	')'	;
LBRACK		:	'['	;
RBRACK		:	']'	;
LCURLY		:	'{'	;
RCURLY		:	'}'	;
LANGLE		:	'<' ;
RANGLE		:	'>' ;
SEMI		:	';'	;
COMMA		:	','	;
PATH		:	'!' ;
RPATH		:	'^' ;
//USCORE		:	'_' ;

DATATYPE	:	"^^"	;

protected
NAME_IT		:	":-"	;

protected
QUESTION	:	'?'	;

ARROW_R		:	"=>"	;
protected
ARROW_L		:	"<="	;
protected
ARROW_MEANS	:	"<=>"	;

ARROW_PATH_L	:	">-"	;
ARROW_PATH_R	:	"->"	;

EQUAL		:	"="	;

// Protected so it does not conflict with the
// QNAME_OR_PREFIX_OR_KEYWORD_OR_NAME_OP rule
// which tests for a leading COLON
protected
COLON		: 	':' 	;


// Single line comment.
SL_COMMENT:
	"#"
	// Uses the fact that the first clause is greedy, eating all
	// non-newlines, thus the end condition is optional newline
	// and it works at the end of the file.
	(~('\n'|'\r'))* (NL)?
	{$setType(Token.SKIP); }
    ;

// Windows: \r\n
// Unix:    \n
// Mac:     \r

protected NL1: "\r\n"  { newline(); } ;
protected NL2: "\n"  { newline(); } ;
protected NL3: "\r"  { newline(); } ;
// Hard work! This makes NL's in ''' and """ strings work
protected NL: (NL1) => NL1 | (NL2) => NL2 | (NL3) => NL3;

// Ignore whitespace.  Not protected as SKIP is passed to parser.
WS:
	( ' ' | '\t' | '\f' | NL )
	{ $setType(Token.SKIP); }
	;
	
protected
NWS: ~(' ' | '\t' | '\f' | '\r' | '\n' ) ;


protected
ALPHA: ('A'..'Z')|('a'..'z') ;

protected
NUMERIC: ('0'..'9') ;

protected
ALPHANUMERIC: (ALPHA|NUMERIC) ;

protected
NON_ANC:	~('A'..'Z'|'a'..'z'|'0'..'9'|':') ;

protected
STRING1
    : (QUOTE3S)=>
      // Needs k=3: if k less a lexer is generated but fails : see antlr doc
	  QUOTE3S!
	  (options{greedy=false;}: (NL)=>NL | ESCAPE | ~('\\'))*
	  QUOTE3S!
	| '\''! (options{greedy=false;}: ESCAPE  | ~'\\')* '\''! ;

protected
STRING2
	: (QUOTE3D)=>
	  QUOTE3D!
	  (options{greedy=false;}: (NL)=>NL | ESCAPE | ~('\\'))*
	  QUOTE3D!
	| '"'! (options{greedy=false;}: ESCAPE  | ~'\\')* '"'!
	;

protected
QUOTE3S: "'''" ;			// 3 single quotes
protected
QUOTE3D: '"' '"' '"' ;		// 3 double quotes

// @@Needs work
protected
ESCAPE: 
		'\\'!
		( (ESC_CHAR) => ESC_CHAR
		| ch:.	{ $setText("\\"+ch) ; }
		) ;


protected
ESC_CHAR:
		( 'n'  { $setText("\n") ; }
		| 'r'  { $setText("\r") ; }
		| 'b'  { $setText("\b") ; }
		| 't'  { $setText("\t") ; }
		| 'f'  { $setText("\f") ; }
		| 'v'  { $setText("\f") ; }
		| 'a'  { $setText("\007") ; }
			// UNICODE escape
			// @@TODO
		| 'u'  h:HEX4
			   {
				char ch = (char)Integer.parseInt(h.getText(), 16) ;
				$setText(ch) ;
				}
		| '"'  { $setText("\"") ; }
		| '\\' { $setText("\\") ; }
		| '\'' { $setText("'") ; }
		)
		; 

protected
HEX_DIGIT
	:	('0'..'9'|'A'..'F'|'a'..'f')
	;
protected
HEX4 : HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT ;
/*
 *  (c) Copyright Hewlett-Packard Company 2002-2003
 *  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -