antlr3baserecognizer.c

来自「antlr最新版本V3源代码」· C语言代码 · 共 1,501 行 · 第 1/3 页
1,501 行
 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. *  In this case, for input "[]", LA(1) is in this set so we would *  not consume anything and after printing an error rule c would *  return normally.  It would not find the required '^' though. *  At this point, it gets a mismatched token error and throws an *  exception (since LA(1) is not in the viable following token *  set).  The rule exception handler tries to recover, but finds *  the same recovery set and doesn't consume anything.  Rule b *  exits normally returning to rule a.  Now it finds the ']' (and *  with the successful match exits errorRecovery mode). * *  So, you cna see that the parser walks up call chain looking *  for the token that was a member of the recovery set. * *  Errors are not generated in errorRecovery mode. * *  ANTLR's error recovery mechanism is based upon original ideas: * *  "Algorithms + Data Structures = Programs" by Niklaus Wirth * *  and * *  "A note on error recovery in recursive descent parsers": *  http://portal.acm.org/citation.cfm?id=947902.947905 * *  Later, Josef Grosch had some good ideas: * *  "Efficient and Comfortable Error Recovery in Recursive Descent *  Parsers": *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * *  Like Grosch I implemented local FOLLOW sets that are combined *  at run-time upon error to avoid overhead during parsing. */static pANTLR3_BITSET		computeErrorRecoverySet	    (pANTLR3_BASE_RECOGNIZER recognizer){    return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);}/** Compute the context-sensitive FOLLOW set for current rule. *  This is set of token types that can follow a specific rule *  reference given a specific call chain.  You get the set of *  viable tokens that can possibly come next (lookahead depth 1) *  given the current call chain.  Contrast this with the *  definition of plain FOLLOW for rule r: * *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} * *  where x in T* and alpha, beta in V*; T is set of terminals and *  V is the set of terminals and nonterminals.  In other words, *  FOLLOW(r) is the set of all tokens that can possibly follow *  references to r in *any* sentential form (context).  At *  runtime, however, we know precisely which context applies as *  we have the call chain.  We may compute the exact (rather *  than covering superset) set of following tokens. * *  For example, consider grammar: * *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF} *       | "return" expr '.' *       ; *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'} *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'} *       | '(' expr ')' *       ; * *  The FOLLOW sets are all inclusive whereas context-sensitive *  FOLLOW sets are precisely what could follow a rule reference. *  For input input "i=(3);", here is the derivation: * *  stat => ID '=' expr ';' *       => ID '=' atom ('+' atom)* ';' *       => ID '=' '(' expr ')' ('+' atom)* ';' *       => ID '=' '(' atom ')' ('+' atom)* ';' *       => ID '=' '(' INT ')' ('+' atom)* ';' *       => ID '=' '(' INT ')' ';' * *  At the "3" token, you'd have a call chain of * *    stat -> expr -> atom -> expr -> atom * *  What can follow that specific nested ref to atom?  Exactly ')' *  as you can see by looking at the derivation of this specific *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}. * *  You want the exact viable token set when recovering from a *  token mismatch.  Upon token mismatch, if LA(1) is member of *  the viable next token set, then you know there is most likely *  a missing token in the input stream.  "Insert" one by just not *  throwing an exception. */static pANTLR3_BITSET		computeCSRuleFollow	    (pANTLR3_BASE_RECOGNIZER recognizer){    return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);}static pANTLR3_BITSET		combineFollows		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact){    pANTLR3_BITSET	followSet;    pANTLR3_BITSET	localFollowSet;    ANTLR3_UINT64	top;    ANTLR3_UINT64	i;    top	= recognizer->following->size(recognizer->following);    followSet	    = antlr3BitsetNew(0);    for (i = top; i>0; i--)    {	localFollowSet = (pANTLR3_BITSET) recognizer->following->get(recognizer->following, i);	if  (localFollowSet != NULL)	{	    followSet->orInPlace(followSet, localFollowSet);	}	if	(      exact == ANTLR3_TRUE		    && localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE		)	{	    break;	}    }    followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);    return  followSet;}#ifdef	WIN32#pragma warning( disable : 4100 )#endifstatic void			displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames){    pANTLR3_PARSER	    parser;    pANTLR3_TREE_PARSER	    tparser;    pANTLR3_INT_STREAM	    is;    pANTLR3_COMMON_TOKEN    theToken;    pANTLR3_BASE_TREE	    theBaseTree;    pANTLR3_COMMON_TREE	    theCommonTree;    /* Indicate this recognizer had an error while processing.     */    recognizer->errorCount++;    theToken	= NULL;		/* Assume there is no token to use  */    fprintf(stderr, "%s(", (char *)(recognizer->exception->streamName));#ifdef WIN32    /* shanzzle fraazzle Dick Dastardly */    fprintf(stderr, "%I64d) ", recognizer->exception->line);#else    fprintf(stderr, "%lld) ", recognizer->exception->type);#endif    fprintf(stderr, ": error %d : %s", 					    recognizer->exception->type,		    (pANTLR3_UINT8)	   (recognizer->exception->message));					        /* How we determine the next piece is dependent on which thign raised the     * error.     */    switch	(recognizer->type)    {    case	ANTLR3_TYPE_PARSER:	parser	    = (pANTLR3_PARSER) (recognizer->super);	tparser	    = NULL;	is	    = parser->tstream->istream;	theToken    = (pANTLR3_COMMON_TOKEN)(recognizer->exception->token);	fprintf(stderr, ", at offset %d", recognizer->exception->charPositionInLine);	if  (theToken != NULL)	{	    if (theToken->type == ANTLR3_TOKEN_EOF)	    {		fprintf(stderr, ", at <EOF>");	    }	    else	    {		fprintf(stderr, ", near %s", theToken->toString(theToken)->chars);	    }	}	break;    case	ANTLR3_TYPE_TREE_PARSER:	tparser		= (pANTLR3_TREE_PARSER) (recognizer->super);	parser		= NULL;	is		= tparser->ctnstream->tnstream->istream;	theBaseTree	= (pANTLR3_BASE_TREE)(recognizer->exception->token);	if  (theBaseTree != NULL)	{	    theCommonTree	= (pANTLR3_COMMON_TREE)	    theBaseTree->super;	    if	(theCommonTree != NULL)	    {		theToken	= (pANTLR3_COMMON_TOKEN)    theCommonTree->getToken(theBaseTree);	    }	    fprintf(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));	}	break;    default:	    	fprintf(stderr, "Base recognizerfunction displayRecognitionError called by unknown parser type - provide override for this function\n");	return;	break;    }         fprintf(stderr, "\n");    /* TODO: Improve error output acccording to the exception type, though generally     *       the implementor will want their own function to replace this.     */}/** Recover from an error found on the input stream.  Mostly this is *  NoViableAlt exceptions, but could be a mismatched token that *  the match() routine could not recover from. */static void			recover			    (pANTLR3_BASE_RECOGNIZER recognizer){    /* Used to compute the follow set of tokens    */    pANTLR3_BITSET	    followSet;    pANTLR3_PARSER	    parser;    pANTLR3_TREE_PARSER	    tparser;    pANTLR3_INT_STREAM	    is;    switch	(recognizer->type)    {    case	ANTLR3_TYPE_PARSER:	parser  = (pANTLR3_PARSER) (recognizer->super);	tparser	= NULL;	is	= parser->tstream->istream;	break;    case	ANTLR3_TYPE_TREE_PARSER:	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);	parser	= NULL;	is	= tparser->ctnstream->tnstream->istream;	break;    default:	    	fprintf(stderr, "Base recognizerfunction recover called by unknown paresr type - provide override for this function\n");	return;	break;    }    /* I know that all the indirection looks confusing, but you get used to it and it really isn't.     * Don't be tempted to use macros like we do for the generated C code, you will never know     * what is going on. The generated C code does this to hide implementation details not clarify them.     */    if	(recognizer->lastErrorIndex == is->index(is))    {	/* The last error was at the same token index point. This must be a case	 * where LT(1) is in the recovery token set so nothing is	 * consumed. Consume a single token so at least to prevent	 * an infinite loop; this is a failsafe.	 */	is->consume(is);    }    /* Record error index position     */    recognizer->lastErrorIndex	 = is->index(is);        /* Work out the follows set for error recovery     */    followSet	= recognizer->computeErrorRecoverySet(recognizer);    /* Call resync hook (for debuggers and so on)     */    recognizer->beginResync(recognizer);    /* Consume tokens until we have resynced to something in the follows set     */    recognizer->consumeUntilSet(recognizer, followSet);    /* End resync hook      */    recognizer->endResync(recognizer);    /* Destoy the temporary bitset we produced.     */    followSet->free(followSet);    /* Reset the in error bit so we don't re-report the exception     */    recognizer->error	= ANTLR3_FALSE;}/** Attempt to recover from a single missing or extra token. * *  EXTRA TOKEN * *  LA(1) is not what we are looking for.  If LA(2) has the right token, *  however, then assume LA(1) is some extra spurious token.  Delete it *  and LA(2) as if we were doing a normal match(), which advances the *  input. * *  MISSING TOKEN * *  If current token is consistent with what could come after *  ttype then it is ok to "insert" the missing token, else throw *  exception For example, Input "i=(3;" is clearly missing the *  ')'.  When the parser returns from the nested call to expr, it *  will have call chain: * *    stat -> expr -> atom * *  and it will be trying to match the ')' at this point in the *  derivation: * *       => ID '=' '(' INT ')' ('+' atom)* ';' *                          ^ *  match() will see that ';' doesn't match ')' and report a *  mismatched token error.  To recover, it sees that LA(1)==';' *  is in the set of tokens that can follow the ')' token *  reference in rule atom.  It can assume that you forgot the ')'. * * May need ot come back and look at the exception stuff here, I am assuming  * that the exception that was passed in in the java implementation is * sotred in the recognizer exception stack. To 'throw' it we set the * error flag and rules can cascade back when this is set. */static void			recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow){    pANTLR3_PARSER	    parser;    pANTLR3_TREE_PARSER	    tparser;    pANTLR3_INT_STREAM	    is;    switch	(recognizer->type)    {    case	ANTLR3_TYPE_PARSER:	parser  = (pANTLR3_PARSER) (recognizer->super);	tparser	= NULL;	is	= parser->tstream->istream;	break;    case	ANTLR3_TYPE_TREE_PARSER:	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);	parser	= NULL;	is	= tparser->ctnstream->tnstream->istream;	break;    default:	    	fprintf(stderr, "Base recognizerfunction recoverFromMismatchedToken called by unknown paresr type - provide override for this function\n");	return;	break;    }    /* If the next token after the one we are looking at in the input stream     * is what we are looking for then we remove the one we have discovered     * from the stream by consuming it, then consume this next one along too as     * if nothing had happened.     */    if	( is->_LA(is, 2) == ttype)    {	/* Print out the error	 */	recognizer->reportError(recognizer);	/* Call resync hook (for debuggeres and so on)	 */	recognizer->beginResync(recognizer);	/* "delete" the extra token	 */	is->consume(is);	/* End resync hook 	 */	recognizer->endResync(recognizer);	/* consume the token that the rule actually expected to get	 */	is->consume(is);	recognizer->error  = ANTLR3_FALSE;	/* Exception is not outstanding any more */    }    /* The next token (after the one that is current, is not the one     * that we were expecting, so the input is in more of an error state     * than we hoped.      * If we are able to recover from the error using the follow set, then     * we are hunky dory again and can move on, if we cannot, then we resort     * to throwing the exception.     */    if	(recognizer->recoverFromMismatchedElement(recognizer, follow) == ANTLR3_FALSE)    {	recognizer->error	    = ANTLR3_TRUE;	recognizer->failed	    = ANTLR3_TRUE;	return;    }}static void		recoverFromMismatchedSet	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow){    pANTLR3_PARSER	    parser;    pANTLR3_TREE_PARSER	    tparser;    pANTLR3_INT_STREAM	    is;    switch	(recognizer->type)    {    case	ANTLR3_TYPE_PARSER:	parser  = (pANTLR3_PARSER) (recognizer->super);	tparser	= NULL;	is	= parser->tstream->istream;	break;    case	ANTLR3_TYPE_TREE_PARSER:	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);	parser	= NULL;	is	= tparser->ctnstream->tnstream->istream;	break;    default:	    	fprintf(stderr, "Base recognizerfunction recoverFromMismatchedSet called by unknown paresr type - provide override for this function\n");	return;	break;    }    /* TODO - Single token deletion like in recoverFromMismatchedToken()     */    if	(recognizer->recoverFromMismatchedElement(recognizer, follow) == ANTLR3_FALSE)    {	recognizer->error	= ANTLR3_TRUE;	recognizer->failed	= ANTLR3_TRUE;	return;    }}/** This code is factored out from mismatched token and mismatched set *  recovery.  It handles "single token insertion" error recovery for *  both.  No tokens are consumed to recover from insertions.  Return *  true if recovery was possible else return false. */static ANTLR3_BOOLEAN	recoverFromMismatchedElement	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow){    pANTLR3_BITSET	    viableToksFollowingRule;    pANTLR3_BITSET	    newFollow;    pANTLR3_PARSER	    parser;    pANTLR3_TREE_PARSER	    tparser;    pANTLR3_INT_STREAM	    is;    switch	(recognizer->type)    {    case	ANTLR3_TYPE_PARSER:	parser  = (pANTLR3_PARSER) (recognizer->super);	tparser	= NULL;	is	= parser->tstream->istream;	break;    case	ANTLR3_TYPE_TREE_PARSER:	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);	parser	= NULL;	is	= tparser->ctnstream->tnstream->istream;	break;    default:	    	fprintf(stderr, "Base recognizerfunction recover called by unknown paresr type - provide override for this function\n");	return ANTLR3_FALSE;
antlr3baserecognizer.c - 源码说明

本页面展示了「antlr最新版本V3源代码」中的 antlr3baserecognizer.c 源码文件，采用 C语言编程语言编写，共 1,501 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与antlr相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?