📄 antlr3baserecognizer.c
字号:
/** \file * Contains the base functions that all recognizers start out with. * Any function can be overridden by a lexer/parser/tree parser or by the * ANTLR3 programmer. * */#include <antlr3baserecognizer.h>#ifdef WIN32#pragma warning( disable : 4100 )#endif/* Interface functions -stanadard implemenations cover parser and treeparser * almost completely but are overriden by parser or tree paresr as needed. Lexer overrides * most of these functions. */static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer);static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer);static void endResync (pANTLR3_BASE_RECOGNIZER recognizer);static ANTLR3_BOOLEAN match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow);static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer);static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow);static void reportError (pANTLR3_BASE_RECOGNIZER recognizer);static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer);static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);static void recover (pANTLR3_BASE_RECOGNIZER recognizer);static void recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow);static void recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow);static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow);static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer);static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);static ANTLR3_UINT64 getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ruleIndex, ANTLR3_UINT64 ruleParseStart);static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ruleIndex);static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ruleIndex, ANTLR3_UINT64 ruleParseStart);static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));static void reset (pANTLR3_BASE_RECOGNIZER recognizer);static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer);ANTLR3_API pANTLR3_BASE_RECOGNIZERantlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint){ pANTLR3_BASE_RECOGNIZER recognizer; /* Allocate memory for the structure */ recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER)); if (recognizer == NULL) { /* Allocation failed */ return (pANTLR3_BASE_RECOGNIZER) ANTLR3_ERR_NOMEM; } /* Install the BR API */ recognizer->alreadyParsedRule = alreadyParsedRule; recognizer->beginResync = beginResync; recognizer->combineFollows = combineFollows; recognizer->computeCSRuleFollow = computeCSRuleFollow; recognizer->computeErrorRecoverySet = computeErrorRecoverySet; recognizer->consumeUntil = consumeUntil; recognizer->consumeUntilSet = consumeUntilSet; recognizer->displayRecognitionError = displayRecognitionError; recognizer->endResync = endResync; recognizer->exConstruct = antlr3MTExceptionNew; recognizer->getRuleInvocationStack = getRuleInvocationStack; recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed; recognizer->getRuleMemoization = getRuleMemoization; recognizer->match = match; recognizer->matchAny = matchAny; recognizer->memoize = memoize; recognizer->mismatch = mismatch; recognizer->recover = recover; recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement; recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet; recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken; recognizer->reportError = reportError; recognizer->reset = reset; recognizer->synpred = synpred; recognizer->toStrings = toStrings; recognizer->free = freeBR; /* Initialize variables */ recognizer->type = type; recognizer->errorRecovery = ANTLR3_FALSE; recognizer->lastErrorIndex = -1; recognizer->failed = ANTLR3_FALSE; recognizer->errorCount = 0; recognizer->backtracking = 0; recognizer->following = NULL; recognizer->_fsp = -1; recognizer->ruleMemo = NULL; recognizer->tokenNames = NULL; recognizer->sizeHint = sizeHint; return recognizer;}static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer){ pANTLR3_EXCEPTION thisE; if (recognizer->ruleMemo != NULL) { recognizer->ruleMemo->free(recognizer->ruleMemo); } thisE = recognizer->exception; if (thisE != NULL) { thisE->freeEx(thisE); } ANTLR3_FREE(recognizer);}/** * \brief * Creates a new Mismatched Token Exception and inserts in the recognizer * exception stack. * * \param recognizer * Context pointer for this recognizer * */ANTLR3_API voidantlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer){ /* Create a basic recognition exception strucuture */ antlr3RecognitionExceptionNew(recognizer); /* Now update it to indicate this is a Mismatched token exception */ recognizer->exception->name = ANTLR3_MISMATCHED_EX_NAME; recognizer->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION; return;}ANTLR3_API voidantlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer){ pANTLR3_EXCEPTION ex; pANTLR3_LEXER lexer; pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INPUT_STREAM ins; pANTLR3_INT_STREAM is; pANTLR3_COMMON_TOKEN_STREAM cts; pANTLR3_TREE_NODE_STREAM tns; ins = NULL; cts = NULL; tns = NULL; is = NULL; lexer = NULL; parser = NULL; tparser = NULL; switch (recognizer->type) { case ANTLR3_TYPE_LEXER: lexer = (pANTLR3_LEXER) (recognizer->super); ins = lexer->input; is = ins->istream; break; case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super); is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); tns = tparser->ctnstream->tnstream; is = tns->istream; break; default: fprintf(stderr, "Base recognizerfunction antlr3RecognitionExceptionNew called by unknown paresr type - provide override for this function\n"); return; break; } /* Create a basic exception strucuture */ ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION, (void *)ANTLR3_RECOGNITION_EX_NAME, NULL, ANTLR3_FALSE); /* Rest of information depends on the base type of the * input stream. */ switch (is->type & ANTLR3_INPUT_MASK) { case ANTLR3_CHARSTREAM: ex->c = is->_LA (is, 1); /* Current input character */ ex->line = ins->getLine (ins); /* Line number comes from stream */ ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */ ex->index = is->index (is); ex->streamName = ins->getSourceName (ins); ex->message = "Unexpected character"; break; case ANTLR3_TOKENSTREAM: ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */ ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine (ex->token); ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine (ex->token); ex->index = cts->tstream->istream->index (cts->tstream->istream); ex->streamName = "Token stream: fix this Jim, pick p name from input stream into token stream!"; ex->message = "Unexpected token"; break; case ANTLR3_COMMONTREENODE: ex->token = tns->_LT (tns, 1); /* Current input tree node */ ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine (ex->token); ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine (ex->token); ex->index = tns->istream->index (tns->istream); ex->streamName = "Treenode stream: fix this Jim, pick p name from input stream into token stream!"; ex->message = "Unexpected node"; break; } ex->input = is; ex->nextException = recognizer->exception; /* So we don't leak the memory */ recognizer->exception = ex; recognizer->error = ANTLR3_TRUE; /* Exception is outstanding */ return;}/** Match current input symbol against ttype. Upon error, do one token * insertion or deletion if possible. You can override to not recover * here and bail out of the current production to the normal error * exception catch (at the end of the method) by just throwing * MismatchedTokenException upon input._LA(1)!=ttype. */static ANTLR3_BOOLEANmatch( pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow){ pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction 'match' called by unknown paresr type - provide override for this function\n"); return ANTLR3_FALSE; break; } if (is->_LA(is, 1) == ttype) { /* The token was the one we were told to expect */ is->consume(is); /* Consume that token from the stream */ recognizer->errorRecovery = ANTLR3_FALSE; /* Not in error recovery now (if we were) */ recognizer->failed = ANTLR3_FALSE; /* The match was a success */ return ANTLR3_TRUE; /* We are done */ } /* We did not find the expectd token type, if we are backtracking then * we just set the failed flag and return. */ if (recognizer->backtracking > 0) { /* Backtracking is going on */ recognizer->failed = ANTLR3_TRUE; return ANTLR3_FALSE; } /* We did not find the expected token and there is no backtracking * going on, so we mismatch, which creates an exception in the recognizer exception * stack. */ recognizer->mismatch(recognizer, ttype, follow); return ANTLR3_FALSE;}/** * \brief * Consumes the next token whatever it is and resets the recognizer state * so that it is not in error. * * \param recognizer * Recognizer context pointer */static voidmatchAny(pANTLR3_BASE_RECOGNIZER recognizer){ pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction 'matchAny' called by unknown paresr type - provide override for this function\n"); return; break; } recognizer->errorRecovery = ANTLR3_FALSE; recognizer->failed = ANTLR3_FALSE; is->consume(is); return;}/** * \remark Mismatch only works for parsers and must be overridden for anything else. */static voidmismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow){ pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; /* Install a mismatched token exception in the exception stack */ antlr3MTExceptionNew(recognizer); recognizer->exception->expecting = ttype; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; default: fprintf(stderr, "Base recognizerfunction 'mismatch' called by unknown parser type - provide override for this function\n"); return; break; } /* Enter error recovery mode */ recognizer->recoverFromMismatchedToken(recognizer, ttype, follow); return;}static void reportError (pANTLR3_BASE_RECOGNIZER recognizer){ if (recognizer->errorRecovery == ANTLR3_TRUE) { /* In error recovery so don't display another error while doing so */ return; } /* Signal we are in error recovery now */ recognizer->errorRecovery = ANTLR3_TRUE; recognizer->displayRecognitionError(recognizer, recognizer->tokenNames);}static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer){}static void endResync (pANTLR3_BASE_RECOGNIZER recognizer){}/** * Documentation below is from the Java implementation. * * Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various "local" * follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth local follow set after call to rule * 0 <EOF> a (from main()) * 1 ']' b * 3 '^' c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -