📄 yacc.c
字号:
changes = 1; while(changes) { changes = 0; NTLOOP(i) { t = pres[i+1]; for(s = pres[i]; s < t; ++s) for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) { changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset); if(!pempty[ch]) break; } } } NTLOOP(i) pfirst[i] = flset(&wsets[i].ws); if(!indebug) return; if(foutput != 0) NTLOOP(i) { Bprint(foutput, "\n%s: ", nontrst[i].name); prlook(pfirst[i]); Bprint(foutput, " %d\n", pempty[i]); }}/* * sorts last state,and sees if it equals earlier ones. returns state number */intstate(int c){ Item *p1, *p2, *k, *l, *q1, *q2; int size1, size2, i; p1 = pstate[nstate]; p2 = pstate[nstate+1]; if(p1 == p2) return 0; /* null state */ /* sort the items */ for(k = p2-1; k > p1; k--) /* make k the biggest */ for(l = k-1; l >= p1; --l) if(l->pitem > k->pitem) { int *s; Lkset *ss; s = k->pitem; k->pitem = l->pitem; l->pitem = s; ss = k->look; k->look = l->look; l->look = ss; } size1 = p2 - p1; /* size of state */ for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) { /* get ith state */ q1 = pstate[i]; q2 = pstate[i+1]; size2 = q2 - q1; if(size1 != size2) continue; k = p1; for(l = q1; l < q2; l++) { if(l->pitem != k->pitem) break; k++; } if(l != q2) continue; /* found it */ pstate[nstate+1] = pstate[nstate]; /* delete last state */ /* fix up lookaheads */ if(nolook) return i; for(l = q1, k = p1; l < q2; ++l, ++k ) { int s; SETLOOP(s) clset.lset[s] = l->look->lset[s]; if(setunion(clset.lset, k->look->lset)) { tystate[i] = MUSTDO; /* register the new set */ l->look = flset( &clset ); } } return i; } /* state is new */ if(nolook) error("yacc state/nolook error"); pstate[nstate+2] = p2; if(nstate+1 >= NSTATES) error("too many states"); if(c >= NTBASE) { mstates[nstate] = ntstates[c-NTBASE]; ntstates[c-NTBASE] = nstate; } else { mstates[nstate] = tstates[c]; tstates[c] = nstate; } tystate[nstate] = MUSTDO; return nstate++;}voidputitem(int *ptr, Lkset *lptr){ Item *j; if(pidebug && foutput != 0) Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate); j = pstate[nstate+1]; j->pitem = ptr; if(!nolook) j->look = flset(lptr); pstate[nstate+1] = ++j; if((int*)j > zzmemsz) { zzmemsz = (int*)j; if(zzmemsz >= &mem0[MEMSIZE]) error("out of state space"); }}/* * mark nonterminals which derive the empty string * also, look for nonterminals which don't derive any token strings */voidcempty(void){ int i, *p; /* first, use the array pempty to detect productions that can never be reduced */ /* set pempty to WHONOWS */ aryfil(pempty, nnonter+1, WHOKNOWS); /* now, look at productions, marking nonterminals which derive something */more: PLOOP(0, i) { if(pempty[*prdptr[i] - NTBASE]) continue; for(p = prdptr[i]+1; *p >= 0; ++p) if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS) break; /* production can be derived */ if(*p < 0) { pempty[*prdptr[i]-NTBASE] = OK; goto more; } } /* now, look at the nonterminals, to see if they are all OK */ NTLOOP(i) { /* the added production rises or falls as the start symbol ... */ if(i == 0) continue; if(pempty[i] != OK) { fatfl = 0; error("nonterminal %s never derives any token string", nontrst[i].name); } } if(nerrors) { summary(); cleantmp(); exits("error"); } /* now, compute the pempty array, to see which nonterminals derive the empty string */ /* set pempty to WHOKNOWS */ aryfil( pempty, nnonter+1, WHOKNOWS); /* loop as long as we keep finding empty nonterminals */again: PLOOP(1, i) { /* not known to be empty */ if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) { for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p) ; /* we have a nontrivially empty nonterminal */ if(*p < 0) { pempty[*prdptr[i]-NTBASE] = EMPTY; /* got one ... try for another */ goto again; } } }}/* * generate the states */voidstagen(void){ int c, i, j, more; Wset *p, *q; /* initialize */ nstate = 0; /* THIS IS FUNNY from the standpoint of portability * it represents the magic moment when the mem0 array, which has * been holding the productions, starts to hold item pointers, of a * different type... * someday, alloc should be used to allocate all this stuff... for now, we * accept that if pointers don't fit in integers, there is a problem... */ pstate[0] = pstate[1] = (Item*)mem; aryfil(clset.lset, tbitset, 0); putitem(prdptr[0]+1, &clset); tystate[0] = MUSTDO; nstate = 1; pstate[2] = pstate[1]; aryfil(amem, ACTSIZE, 0); /* now, the main state generation loop */ for(more=1; more;) { more = 0; SLOOP(i) { if(tystate[i] != MUSTDO) continue; tystate[i] = DONE; aryfil(temp1, nnonter+1, 0); /* take state i, close it, and do gotos */ closure(i); /* generate goto's */ WSLOOP(wsets, p) { if(p->flag) continue; p->flag = 1; c = *(p->pitem); if(c <= 1) { if(pstate[i+1]-pstate[i] <= p-wsets) tystate[i] = MUSTLOOKAHEAD; continue; } /* do a goto on c */ WSLOOP(p, q) /* this item contributes to the goto */ if(c == *(q->pitem)) { putitem(q->pitem+1, &q->ws); q->flag = 1; } if(c < NTBASE) state(c); /* register new state */ else temp1[c-NTBASE] = state(c); } if(gsdebug && foutput != 0) { Bprint(foutput, "%d: ", i); NTLOOP(j) if(temp1[j]) Bprint(foutput, "%s %d, ", nontrst[j].name, temp1[j]); Bprint(foutput, "\n"); } indgo[i] = apack(&temp1[1], nnonter-1) - 1; /* do some more */ more = 1; } }}/* * generate the closure of state i */voidclosure(int i){ Wset *u, *v; Item *p, *q; int c, ch, work, k, *pi, **s, **t; zzclose++; /* first, copy kernel of state i to wsets */ cwp = wsets; ITMLOOP(i, p, q) { cwp->pitem = p->pitem; cwp->flag = 1; /* this item must get closed */ SETLOOP(k) cwp->ws.lset[k] = p->look->lset[k]; WSBUMP(cwp); } /* now, go through the loop, closing each item */ work = 1; while(work) { work = 0; WSLOOP(wsets, u) { if(u->flag == 0) continue; /* dot is before c */ c = *(u->pitem); if(c < NTBASE) { u->flag = 0; /* only interesting case is where . is before nonterminal */ continue; } /* compute the lookahead */ aryfil(clset.lset, tbitset, 0); /* find items involving c */ WSLOOP(u, v) if(v->flag == 1 && *(pi=v->pitem) == c) { v->flag = 0; if(nolook) continue; while((ch = *++pi) > 0) { /* terminal symbol */ if(ch < NTBASE) { SETBIT(clset.lset, ch); break; } /* nonterminal symbol */ setunion(clset.lset, pfirst[ch-NTBASE]->lset); if(!pempty[ch-NTBASE]) break; } if(ch <= 0) setunion(clset.lset, v->ws.lset); } /* * now loop over productions derived from c * c is now nonterminal number */ c -= NTBASE; t = pres[c+1]; for(s = pres[c]; s < t; ++s) { /* * put these items into the closure * is the item there */ WSLOOP(wsets, v) /* yes, it is there */ if(v->pitem == *s) { if(nolook) goto nexts; if(setunion(v->ws.lset, clset.lset)) v->flag = work = 1; goto nexts; } /* not there; make a new entry */ if(cwp-wsets+1 >= WSETSIZE) error( "working set overflow"); cwp->pitem = *s; cwp->flag = 1; if(!nolook) { work = 1; SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; } WSBUMP(cwp); nexts:; } } } /* have computed closure; flags are reset; return */ if(cwp > zzcwp) zzcwp = cwp; if(cldebug && foutput != 0) { Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook); WSLOOP(wsets, u) { if(u->flag) Bprint(foutput, "flag set!\n"); u->flag = 0; Bprint(foutput, "\t%s", writem(u->pitem)); prlook(&u->ws); Bprint(foutput, "\n"); } }}/* * decide if the lookahead set pointed to by p is known * return pointer to a perminent location for the set */Lkset*flset(Lkset *p){ Lkset *q; int *u, *v, *w, j; for(q = &lkst[nlset]; q-- > lkst;) { u = p->lset; v = q->lset; w = &v[tbitset]; while(v < w) if(*u++ != *v++) goto more; /* we have matched */ return q; more:; } /* add a new one */ q = &lkst[nlset++]; if(nlset >= LSETSIZE) error("too many lookahead sets"); SETLOOP(j) q->lset[j] = p->lset[j]; return q;}voidcleantmp(void){ ZAPFILE(actname); ZAPFILE(tempname);}voidintr(void){ cleantmp(); exits("interrupted");}voidusage(void){ fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n"); exits("usage");}voidsetup(int argc, char *argv[]){ long c, t; int i, j, lev, ty, ytab, *p; int vflag, dflag, stem; char actnm[8], *stemc, *s, dirbuf[128]; ytab = 0; vflag = 0; dflag = 0; stem = 0; stemc = "y"; foutput = 0; fdefine = 0; fdebug = 0; ARGBEGIN{ case 'v': case 'V': vflag++; break; case 'D': yydebug = EARGF(usage()); break; case 'd': dflag++; break; case 'o': ytab++; ytabc = EARGF(usage()); break; case 's': stem++; stemc = ARGF(); break; case 'S': parser = PARSERS; break; default: error("illegal option: %c", ARGC()); }ARGEND openup(stemc, dflag, vflag, ytab, ytabc); ftemp = Bopen(tempname = mktemp(ttempname), OWRITE); faction = Bopen(actname = mktemp(tactname), OWRITE); if(ftemp == 0 || faction == 0) error("cannot open temp file"); if(argc < 1) error("no input file"); infile = argv[0]; if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){ i = strlen(infile)+1+strlen(dirbuf)+1+10; s = malloc(i); if(s != nil){ snprint(s, i, "%s/%s", dirbuf, infile); cleanname(s); infile = s; } } finput = Bopen(infile, OREAD); if(finput == 0) error("cannot open '%s'", argv[0]); cnamp = cnames; defin(0, "$end"); extval = PRIVATE; /* tokens start in unicode 'private use' */ defin(0, "error"); defin(1, "$accept"); defin(0, "$unk"); mem = mem0; i = 0; for(t = gettok(); t != MARK && t != ENDFILE;) switch(t) { case ';': t = gettok(); break; case START: if(gettok() != IDENTIFIER) error("bad %%start construction"); start = chfind(1, tokname); t = gettok(); continue; case TYPEDEF: if(gettok() != TYPENAME) error("bad syntax in %%type"); ty = numbval; for(;;) { t = gettok(); switch(t) { case IDENTIFIER: if((t=chfind(1, tokname)) < NTBASE) { j = TYPE(toklev[t]); if(j != 0 && j != ty) error("type redeclaration of token %s", tokset[t].name); else SETTYPE(toklev[t], ty); } else { j = nontrst[t-NTBASE].value; if(j != 0 && j != ty) error("type redeclaration of nonterminal %s", nontrst[t-NTBASE].name ); else nontrst[t-NTBASE].value = ty; } case ',': continue; case ';': t = gettok(); default: break; } break; } continue; case UNION: /* copy the union declaration to the output */ cpyunion(); t = gettok(); continue; case LEFT: case BINARY: case RIGHT: i++; case TERM: /* nonzero means new prec. and assoc. */ lev = t-TERM; ty = 0; /* get identifiers so defined */ t = gettok(); /* there is a type defined */ if(t == TYPENAME) { ty = numbval; t = gettok(); } for(;;) { switch(t) { case ',': t = gettok(); continue; case ';': break; case IDENTIFIER: j = chfind(0, tokname); if(j >= NTBASE) error("%s defined earlier as nonterminal", tokname); if(lev) { if(ASSOC(toklev[j])) error("redeclaration of precedence of %s", tokname); SETASC(toklev[j], lev); SETPLEV(toklev[j], i); } if(ty) { if(TYPE(toklev[j])) error("redeclaration of type of %s", tokname); SETTYPE(toklev[j],ty); } t = gettok(); if(t == NUMBER) { tokset[j].value = numbval; if(j < ndefout && j > 3) error("please define type number of %s earlier", tokset[j].name); t = gettok(); } continue; } break; } continue; case LCURLY: defout(0); cpycode(); t = gettok(); continue; default: error("syntax error"); } if(t == ENDFILE) error("unexpected EOF before %%"); /* t is MARK */ Bprint(ftable, "extern int yyerrflag;\n"); Bprint(ftable, "#ifndef YYMAXDEPTH\n"); Bprint(ftable, "#define YYMAXDEPTH 150\n"); Bprint(ftable, "#endif\n" ); if(!ntypes) { Bprint(ftable, "#ifndef YYSTYPE\n"); Bprint(ftable, "#define YYSTYPE int\n"); Bprint(ftable, "#endif\n"); } Bprint(ftable, "YYSTYPE yylval;\n"); Bprint(ftable, "YYSTYPE yyval;\n"); prdptr[0] = mem; /* added production */ *mem++ = NTBASE; /* if start is 0, we will overwrite with the lhs of the first rule */ *mem++ = start; *mem++ = 1; *mem++ = 0; prdptr[1] = mem; while((t=gettok()) == LCURLY) cpycode(); if(t != IDENTCOLON) error("bad syntax on first rule"); if(!start) prdptr[0][1] = chfind(1, tokname); /* read rules */ while(t != MARK && t != ENDFILE) { /* process a rule */ rlines[nprod] = lineno; if(t == '|') *mem++ = *prdptr[nprod-1]; else if(t == IDENTCOLON) { *mem = chfind(1, tokname); if(*mem < NTBASE) error("token illegal on LHS of grammar rule"); mem++; } else error("illegal rule: missing semicolon or | ?"); /* read rule body */ t = gettok(); more_rule: while(t == IDENTIFIER) { *mem = chfind(1, tokname); if(*mem < NTBASE) levprd[nprod] = toklev[*mem]; mem++; t = gettok(); } if(t == PREC) { if(gettok() != IDENTIFIER) error("illegal %%prec syntax"); j = chfind(2, tokname); if(j >= NTBASE) error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name); levprd[nprod] = toklev[j]; t = gettok(); } if(t == '=') { levprd[nprod] |= ACTFLAG; Bprint(faction, "\ncase %d:", nprod); cpyact(mem-prdptr[nprod]-1); Bprint(faction, " break;"); if((t=gettok()) == IDENTIFIER) { /* action within rule... */ sprint(actnm, "$$%d", nprod); /* make it a nonterminal */ j = chfind(1, actnm); /* * the current rule will become rule number nprod+1 * move the contents down, and make room for the null */ for(p = mem; p >= prdptr[nprod]; --p) p[2] = *p; mem += 2; /* enter null production for action */ p = prdptr[nprod]; *p++ = j; *p++ = -nprod; /* update the production information */ levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; levprd[nprod] = ACTFLAG; if(++nprod >= NPROD) error("more than %d rules", NPROD); prdptr[nprod] = p; /* make the action appear in the original rule */ *mem++ = j; /* get some more of the rule */ goto more_rule; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -