📄 yyrecover.c
字号:
/*- * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#ifndef lintstatic char sccsid[] = "@(#)yyrecover.c 8.1 (Berkeley) 6/6/93";#endif /* not lint */#include "whoami.h"#include "0.h"#include "tree_ty.h" /* must be included for yy.h */#include "yy.h"/* * Very simplified version of Graham-Rhodes error recovery * method for LALR parsers. Backward move is embodied in * default reductions of the yacc parser until an error condition * is reached. Forward move is over a small number of input tokens * and cannot "condense". The basic corrections are: * * 1) Delete the input token. * * 2) Replace the current input with a legal input. * * 3) Insert a legal token. * * All corrections are weighted, considered only if they allow * at least two shifts, and the cost of a correction increases if * it allows shifting over only a part of the lookahead. * * Another error situation is that which occurs when an identifier "fails" * a reduction because it is not the required "class". * In this case, we also consider replacing this identifier, which has * already been shifted over, with an identifier of the correct class. * * Another correction performed here is unique symbol insertion. * If the current state admits only one input, and no other alternative * correction presents itself, then that symbol will be inserted. * There is a danger in this of looping, and it is handled * by counting true shifts over input (see below). * * * A final class of corrections, considered only when the error * occurred immediately after a shift over a terminal, involves * the three basic corrections above, but with the point of error * considered to be before this terminal was shifted over, effectively * "unreading" this terminal. This is a feeble attempt at elimination * of the left-right bias and because "if" has a low weight and some * statements are quite simple i.e. * * cse ch of ... * * we can get a small number of errors. The major deficiency of * this is that we back up only one token, and that the forward * move is over a small number of tokens, often not enough to really * tell what the input should be, e.g. in * * a[i] > a[i - 1] ... * * In such cases a bad identifier (misspelled keyword) or omitted * keyword will be change or inserted as "if" as it has the lowest cost. * This is not terribly bad, as "if"s are most common. * This also allows the correction of other errors. * * This recovery depends on the default reductions which delay * noticing the error until the parse reaches a state where the * relevant "alternatives" are visible. Note that it does not * consider tokens which will cause reductions before being * shifted over. This requires the grammar to be written in a * certain way for the recovery to work correctly. * In some sense, also, the recovery suffers because we have * LALR(1) tables rather than LR(1) tables, e.g. in * * if rec.field < rec2,field2 then *//* * Definitions of possible corrective actions */#define CPANIC 0#define CDELETE 1#define CREPLACE 2#define CINSERT 3#define CUNIQUE 4#define CCHIDENT 5/* * Multiplicative cost factors for corrective actions. * * When an error occurs we take YCSIZ - 1 look-ahead tokens. * If a correction being considered will shift over only part of * that look-ahead, it is not completely discarded, but rather * "weighted", its cost being multiplied by a weighting factor. * For a correction to be considered its weighted cost must be less * than CLIMIT. * * Non-weighted costs are considered: * * LOW <= 3 * MEDIUM 4,5 * HIGH >= 6 * * CURRENT WEIGHTING STRATEGY: Aug 20, 1977 * * For all kinds of corrections we demand shifts over two symbols. * Corrections have high weight even after two symbol * shifts because the costs for deleting and inserting symbols are actually * quite low; we do not want to change weighty symbols * on inconclusive evidence. * * The weights are the same after the third look ahead. * This prevents later, unrelated errors from causing "funny" * biases of the weights toward one type of correction. * * Current look ahead is 5 symbols. *//*** CLIMIT is defined in yy.h for yycosts ***/#define CPRLIMIT 50#define CCHIDCOST 3char insmult[8] = {INFINITY, INFINITY, INFINITY, 15, 8, 6, 3, 1};char repmult[7] = {INFINITY, INFINITY, INFINITY, 8, 6, 3, 1};char delmult[6] = {INFINITY, INFINITY, INFINITY, 6, 3, 1};#define NOCHAR -1#define Eprintf if (errtrace) printf#define Tprintf if (testtrace) printf/* * Action arrays of the parser needed here */union semstack *yypv;int yyact[], yypact[];/* * Yytips is the tip of the stack when using * the function loccor to check for local * syntactic correctness. As we don't want * to copy the whole parser stack, but want * to simulate parser moves, we "split" * the parser stack and keep the tip here. */#define YYTIPSIZ 16int yytips[YYTIPSIZ], yytipct;int yytipv[YYTIPSIZ];/* * The array YC saves the lookahead tokens for the * forward moves. * Yccnt is the number of tokens in the YC array. */#define YCSIZ 6int yCcnt;struct yytok YC0[YCSIZ + 1];struct yytok *YC;/* * YCps gives the top of stack at * the point of error. */bool yyunique = TRUE;STATIC unsigned yyTshifts;/* * Cact is the corrective action we have decided on * so far, ccost its cost, and cchar the associated token. * Cflag tells if the correction is over the previous input token. */int cact, ccost, cchar, cflag;/* * ACtok holds the token under * consideration when examining * the lookaheads in a state. */struct yytok ACtok;#define acchar ACtok.Yychar#define aclval ACtok.Yylval/* * Make a correction to the current stack which has * top of stack pointer Ps. */yyrecover(Ps0, idfail) int *Ps0, idfail;{ register int c, i; int yyrwant, yyrhave;#ifdef PI Recovery = TRUE;#endif YC = &YC0[1];#ifdef DEBUG if (errtrace) { setpfx('p'); yerror("Point of error"); printf("States %d %d ...", Ps0[0], Ps0[-1]); if (idfail) printf(" [Idfail]");#ifdef PXP putchar('\n');#else pchr('\n');#endif printf("Input %s%s", tokname(&Y , 0) , tokname(&Y , 1)); }#endif /* * We first save the current input token * and its associated semantic information. */ if (yychar < 0) yychar = yylex(); copy((char *) (&YC[0]), (char *) (&Y), sizeof Y); /* * Set the default action and cost */ cact = CPANIC, ccost = CLIMIT, cflag = 0; /* * Peek ahead */ for (yCcnt = 1; yCcnt < YCSIZ; yCcnt++) { yychar = yylex(); copy((char *) (&YC[yCcnt]), (char *) (&Y), sizeof YC[0]);#ifdef DEBUG Eprintf(" | %s%s", tokname(&YC[yCcnt] , 0 ) , tokname(&YC[yCcnt] , 1 ));#endif }#ifdef DEBUG Eprintf("\n");#endif /* * If we are here because a reduction failed, try * correcting that. */ if (idfail) { /* * Save the particulars about * the kind of identifier we want/have. */ yyrwant = yyidwant; yyrhave = yyidhave;#ifdef DEBUG Tprintf(" Try Replace %s identifier with %s identifier cost=%d\n", classes[yyidhave], classes[yyidwant], CCHIDCOST);#endif /* * Save the semantics of the ID on the * stack, and null them out to free * up the reduction in question. */ i = yypv[0].i_entry; yypv[0].i_entry = nullsem(YID); c = correct(NOCHAR, 0, CCHIDCOST, &repmult[2], Ps0, (int *) yypv); yypv[0].i_entry = i;#ifdef DEBUG if (c < CPRLIMIT || fulltrace) Eprintf("Cost %2d Replace %s identifier with %s identifier\n", c, classes[yyrhave], classes[yyrwant]);#endif if (c < ccost) cact = CCHIDENT, ccost = c, cchar = YID; } /* * First try correcting the state we are in */ trystate(Ps0, (int *) yypv, 0, &insmult[1], &delmult[1], &repmult[1]); /* * Now, if we just shifted over a terminal, try * correcting it. */ if (OY.Yychar != -1 && OY.Yylval != nullsem(OY.Yychar)) { YC--; copy((char *) (&YC[0]), (char *) (&OY), sizeof YC[0]); trystate(Ps0 - 1, (int *) (yypv - 1), 1, insmult, delmult, repmult); if (cflag == 0) YC++; else { yypv--;#ifdef PXP yypw--;#endif Ps0--; yCcnt++; } } /* * Restoring the first look ahead into * the scanner token allows the error message * routine to print the error message with the text * of the correct line. */ copy((char *) (&Y), (char *) (&YC[0]), sizeof Y); /* * Unique symbol insertion. * * If there was no reasonable correction found, * but only one input to the parser is acceptable * we report that, and try it. * * Special precautions here to prevent looping. * The number of true inputs shifted over at the point * of the last unique insertion is recorded in the * variable yyTshifts. If this is not less than * the current number in yytshifts, we do not insert. * Thus, after one unique insertion, no more unique * insertions will be made until an input is shifted * over. This guarantees termination. */ if (cact == CPANIC && !idfail) { register int *ap; ap = &yyact[yypact[*Ps0 + 1]]; if (*ap == -ERROR) ap += 2; if (ap[0] <= 0 && ap[2] > 0) { cchar = -ap[0]; if (cchar == YEOF) yyexeof(); if (cchar != ERROR && yyTshifts < yytshifts) { cact = CUNIQUE;#ifdef DEBUG Eprintf("Unique symbol %s%s\n" , charname(cchar , 0 ) , charname(cchar , 1 ));#endif /* * Note that the inserted symbol * will not be counted as a true input * (i.e. the "yytshifts--" below) * so that a true shift will be needed * to make yytshifts > yyTshifts. */ yyTshifts = yytshifts; } } } /* * Set up to perform the correction. * Build a token appropriate for replacement * or insertion in the yytok structure ACchar * having the attributes of the input at the * point of error. */ copy((char *) (&ACtok), (char *) (&YC[0]), sizeof ACtok); acchar = cchar; aclval = nullsem(acchar); if (aclval != NIL) recovered(); switch (cact) { /* * Panic, just restore the * lookahead and return. */ case CPANIC: setpfx('E'); if (idfail) { copy((char *) (&Y), (char *) (&OY), sizeof Y); if (yyrhave == NIL) {#ifdef PI if (yybaduse(yypv[0].cptr, yyeline, ISUNDEF) == NIL)#endif yerror("Undefined identifier"); } else { yerror("Improper %s identifier", classes[yyrhave]);#ifdef PI (void) yybaduse(yypv[0].cptr, yyeline, NIL);#endif } /* * Suppress message from panic routine */ yyshifts = 1; } i = 0; /* Note that on one path we dont touch yyshifts ! */ break; /* * Delete the input. * Mark this as a shift over true input. * Restore the lookahead starting at * the second token. */ case CDELETE: if (ccost != 0) yerror("Deleted %s%s", tokname(&YC[0] , 0 ) , tokname(&YC[0] , 1 )); yytshifts++; i = 1; yyshifts = 0; break; /* * Replace the input with a new token. */ case CREPLACE: if (acchar == YEOF) yyexeof(); if (acchar == YEND) aclval = NIL; yerror("Replaced %s%s with a %s%s", tokname(&YC[0] , 0 ), tokname(&YC[0] , 1 ),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -