⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cpp_parser.g

📁 C Preprocessor,antlr的grammar语言描述,用于学习词法分析,语法分析
💻 G
📖 第 1 页 / 共 5 页
字号:
	:	
		(	"goto" ID SEMICOLON {end_of_stmt();}
		|	"continue" SEMICOLON {end_of_stmt();}
		|	"break" SEMICOLON {end_of_stmt();}
		|	// DW 16/05/03 May be problem here if return is followed by a cast expression 
			"return" {in_return = true;}
			(	options{warnWhenFollowAmbig = false;}:
				(LPAREN {(qualifiedItemIsOneOf(qiType) )}? ID RPAREN)=> 
				LPAREN ID RPAREN (expression)?	// This is an unsatisfactory fix for problem in xstring re "return (allocator);"
												//  and in xlocale re return (_E)(_Tolower((unsigned char)_C, &_Ctype));
				//{printf("%d CPP_parser.g jump_statement Return fix used\n",lineNo);}
			|	expression 
			)?	SEMICOLON {in_return = false,end_of_stmt();} 
		)
	;

//try_block
try_block
	:	
		"try" compound_statement (handler)*
	;

//handler
handler
	:	
		"catch"
		{exceptionBeginHandler();}
		{declaratorParameterList(1);}
		LPAREN exception_declaration RPAREN
		{declaratorEndParameterList(1);}
		compound_statement
		{exceptionEndHandler();}
	;

//exception_declaration
exception_declaration
	:	
		parameter_declaration_list
	;

/* This is an expression of type void according to the ARM, which
 * to me means "statement"; it removes some ambiguity to put it in
 * as a statement also.
 */
//throw_statement
throw_statement
	:	
		"throw" (assignment_expression) ? SEMICOLON { end_of_stmt();}
	;

//using_statement
using_statement
	{char *qid;}
	:		
		"using"
		("namespace" qid = qualified_id		// Using-directive
		|("typename")? qid = qualified_id	// Using-declaration
		)
		SEMICOLON {end_of_stmt();}
	;

///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
//////////////////////////////  EXPRESSIONS ///////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////

// Same as expression_list
//expression
expression
	{
	lineNo = LT(1)->getLine();
	}
	:	
		assignment_expression (COMMA assignment_expression)*
	;

// right-to-left for assignment op
//assignment_expression
assignment_expression
	:	
		conditional_expression
		(	
			(ASSIGNEQUAL|TIMESEQUAL|DIVIDEEQUAL|MINUSEQUAL|PLUSEQUAL
			|MODEQUAL|SHIFTLEFTEQUAL|SHIFTRIGHTEQUAL|BITWISEANDEQUAL
			|BITWISEXOREQUAL|BITWISEOREQUAL
			)
			remainder_expression
		)?
	;

//remainder_expression
remainder_expression
	:
		(	
			(conditional_expression (COMMA|SEMICOLON|RPAREN) )=>
			{assign_stmt_RHS_found += 1;}
			assignment_expression
			{
			if (assign_stmt_RHS_found > 0)
				assign_stmt_RHS_found -= 1;
			else
				{
				fprintf(stderr,"%d warning Error in assign_stmt_RHS_found = %d\n",
					lineNo,assign_stmt_RHS_found);
				fprintf(stderr,"Press return to continue\n");
				getchar();
				}
			}
		|	
			assignment_expression
		)
	;

//conditional_expression
conditional_expression
	:	
		logical_or_expression
		(QUESTIONMARK expression COLON conditional_expression)?
	;

//constant_expression
constant_expression
	:	
		conditional_expression
	;

//logical_or_expression
logical_or_expression
	:	
		logical_and_expression (OR logical_and_expression)* 
	;

//logical_and_expression
logical_and_expression
	:	
		inclusive_or_expression (AND inclusive_or_expression)* 
	;

//inclusive_or_expression
inclusive_or_expression
	:	
		exclusive_or_expression (BITWISEOR exclusive_or_expression)*
	;

//exclusive_or_expression
exclusive_or_expression
	:	
		and_expression (BITWISEXOR and_expression)*
	;

//and_expression
and_expression
	:	
		equality_expression (AMPERSAND equality_expression)*
	;

//equality_expression
equality_expression
	:	
		relational_expression ( (NOTEQUAL|EQUAL) relational_expression)*
	;

//relational_expression
relational_expression
	:	
		shift_expression
		(options {warnWhenFollowAmbig = false;}:
			(	LESSTHAN
			|	GREATERTHAN
			|	LESSTHANOREQUALTO
			|	GREATERTHANOREQUALTO
			)
		 shift_expression
		)*
	;

//shift_expression
shift_expression
	:	
		additive_expression ((SHIFTLEFT | SHIFTRIGHT) additive_expression)*
	;

// See comment for multiplicative_expression regarding #pragma
additive_expression
	:	
		multiplicative_expression
		(options{warnWhenFollowAmbig = false;}:
			(PLUS | MINUS) multiplicative_expression
		)*
	;

// ANTLR has trouble dealing with the analysis of the confusing unary/binary
// operators such as STAR, AMPERSAND, PLUS, etc...  
// With the #pragma (now "(options{warnWhenFollowAmbig = false;}:" etc.)
// we simply tell ANTLR to use the "quick-to-analyze" approximate lookahead
// as full LL(k) lookahead will not resolve the ambiguity anyway.  Might
// as well not bother.  This has the side-benefit that ANTLR doesn't go
// off to lunch here (take infinite time to read grammar).
multiplicative_expression
	:	
		pm_expression
		(options{warnWhenFollowAmbig = false;}:
			(STAR|DIVIDE|MOD) pm_expression
		)*
	;

//pm_expression
pm_expression
	:	
		cast_expression ( (DOTMBR|POINTERTOMBR) cast_expression)*
	;

/* The string "( ID" can be either the start of a cast or
 * the start of a unary_expression.  However, the ID must
 * be a type name for it to be a cast.  Since ANTLR can only hoist
 * semantic predicates that are visible without consuming a token,
 * the semantic predicate in rule type_name is not hoisted--hence, the
 * rule is reported to be ambiguous.  I am manually putting in the
 * correctly hoisted predicate.
 *
 * Ack! Actually "( ID" might be the start of "(T(expr))" which makes
 * the first parens just an ordinary expression grouping.  The solution
 * is to look at what follows the type, T.  Note, this could be a
 * qualified type.  Yucko.  I believe that "(T(" can only imply
 * function-style type cast in an expression (...) grouping.
 *
 * We DO NOT handle the following situation correctly at the moment:
 * Suppose you have
 *    struct rusage rusage;
 *    return (rusage.fp);
 *    return (rusage*)p;
 * Now essentially there is an ambiguity here. If rusage is followed by any
 * postix operators then it is an identifier else it is a type name. This
 * problem does not occur in C because, unless the tag struct is attached,
 * rusage is not a type name. However in C++ that restriction is removed.
 * No *real* programmer would do this, but it's in the C++ standard just for
 * fun..
 *
 * Another fun one (from an LL standpoint):
 *
 *   (A::B::T *)v;      // that's a cast of v to type A::B::T
 *   (A::B::foo);    // that's a simple member access
 *
 * The qualifiedItemIs(1) function scans ahead to what follows the
 * final "::" and returns qiType if the item is a type.  The offset of
 * '1' makes it ignore the initial LPAREN; normally, the offset is 0.
 */
//cast_expression
cast_expression 
	:
		(LPAREN type_id RPAREN unary_expression)=>
		 LPAREN type_id RPAREN unary_expression
	|
		// Believe it or not, you can get more than one cast expression in sequence
		(LPAREN type_id RPAREN cast_expression)=>
		 LPAREN type_id RPAREN cast_expression
	|  
		unary_expression	// handles outer (...) of "(T(expr))"
	;

//unary_expression
unary_expression
	:
		(	
			(postfix_expression)=> 
			postfix_expression
		|	
			PLUSPLUS unary_expression
		|	
			MINUSMINUS unary_expression
		|	
			unary_operator cast_expression
		|	
			("sizeof"
			|"__alignof__" 	//Zhaojz 02/02/05 to fix bug 29 (GNU)
			)
			(	(unary_expression)=>
				 unary_expression
			|
				LPAREN type_id RPAREN
			)
		|   
			(SCOPE)?
			(new_expression
			|delete_expression
			)
		)
	;

//postfix_expression
postfix_expression
	{
	 TypeSpecifier ts;
	}
	:
	(	
		options {warnWhenFollowAmbig = false;}:
		// Function-style cast must have a leading type
		{!(LA(1)==LPAREN)}?
		(ts = simple_type_specifier LPAREN RPAREN LPAREN)=>	// DW 01/08/03 To cope with problem in xtree (see test10.i)
		 ts = simple_type_specifier LPAREN RPAREN LPAREN (expression_list)? RPAREN
	|
		{!(LA(1)==LPAREN)}?
		(ts = simple_type_specifier LPAREN)=>
		 ts = simple_type_specifier LPAREN (expression_list)? RPAREN
		// Following put in to allow for the above being a constructor as shown in test_constructors_destructors.cpp
		(DOT postfix_expression)?
	|  
		primary_expression
		(options {warnWhenFollowAmbig = false;}:
        	LSQUARE expression RSQUARE
		|	
			LPAREN (expression_list)? RPAREN 
		|	(DOT|POINTERTO) ("template")? id_expression
		|	PLUSPLUS 
		|	MINUSMINUS
		)*
	|
		("dynamic_cast"|"static_cast"|"reinterpret_cast"|"const_cast")
		LESSTHAN ("const")? ts = type_specifier (ptr_operator)? GREATERTHAN
		LPAREN expression RPAREN
	|
		"typeid" 
		LPAREN ((type_id)=>type_id|expression) RPAREN
		( (DOT|POINTERTO) postfix_expression)?
	)
	;

//primary_expression
primary_expression
	:	id_expression
	|	literal
	|	"this"
	|	LPAREN expression RPAREN
	;

//id_expression
id_expression 
	{
	char *s;
	}
	:
		(	s = qualified_id 
		)
	;

//literal
literal
	:	OCTALINT
	|	DECIMALINT
	|	HEXADECIMALINT
	|	CharLiteral
	|	WCharLiteral
	|	(StringLiteral|WStringLiteral)+
	|	FLOATONE
	|	FLOATTWO
	|	"true"
	|	"false"
	;

//unary_operator
unary_operator
	:	AMPERSAND
	|	STAR
	|	PLUS
	|	MINUS
	|	TILDE
	|	NOT
	;

/* JEL The first ()? is used to resolve "new (expr) (type)" because both
 * (expr) and (type) look identical until you've seen the whole thing.
 *
 * new_initializer appears to be conflicting with function arguments as
 * function arguments can follow a primary_expression.  [This is a full
 * LL(k) versus LALL(k) problem.  Enhancing context by duplication of
 * some rules might handle this.]
 */
//new_expression
new_expression
	:
	(  
		"new"
		(	(LPAREN expression_list RPAREN)=> 
			 LPAREN expression_list RPAREN)?
		(new_type_id|LPAREN type_id RPAREN)
		(options{warnWhenFollowAmbig = false;}:	
		(new_initializer)=> new_initializer)?
	)
	;

//new_initializer
new_initializer
	:	
		LPAREN (expression_list)? RPAREN
	;

//new_type_id
new_type_id
	:	
		declaration_specifiers 
		(options {warnWhenFollowAmbig = false;}:
			new_declarator 
		)?
	;

//new_declarator
new_declarator
	:	 
		ptr_operator
		(options {warnWhenFollowAmbig = false;}:
		new_declarator)?
	|	
		direct_new_declarator
	;

/* The "[expression]" construct conflicts with the "new []" construct
 * (and possibly others).  We used approximate lookahead for the "new []"
 * construct so that it would not try to compute full LL(2) lookahead.
 * Here, we use #pragma approx again because anytime we see a [ followed
 * by token that can begin an expression, we always want to loop.
 * Approximate lookahead handles this correctly.  In fact, approximate
 * lookahead is the same as full lookahead when all but the last lookahead
 * depth are singleton sets; e.g., {"["} followed by FIRST(expression).
 */
//direct_new_declarator
direct_new_declarator
	:
		(options {warnWhenFollowAmbig = false;}:
			LSQUARE expression RSQUARE
		)+
	;

//ptr_operator
ptr_operator
	{char *s;}
	:	
		(	AMPERSAND 	{is_address = true;}
		|	("_cdecl"|"__cdecl") 
		|	("_near"|"__near") 
		|	("_far"|"__far") 
		|	"__interrupt" 
		|	("pascal"|"_pascal"|"__pascal") 
		|	("_stdcall"|"__stdcall") 
		|	(s = scope_override STAR cv_qualifier_seq)=>
			 s = scope_override STAR {is_pointer = true;} cv_qualifier_seq
		)	
   ;

// Match A::B::*	// May be redundant 14/06/06
//ptr_to_member
ptr_to_member	// Part of ptr_operator in grammar.txt
	{char *s;}
	:
		s = scope_override STAR  {is_pointer = true;} cv_qualifier_seq
	;

// JEL note:  does not use (const|volatile)*

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -