parser.c
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 670 行 · 第 1/2 页
C
670 行
uchar unescape(SubString &s){
s.len--;
uchar c;
if((c = *s.str++) != '\\' || s.len == 0)
return xlat[c];
s.len--;
switch(c = *s.str++){
case 'n':
return xlat['\n'];
case 't':
return xlat['\t'];
case 'v':
return xlat['\v'];
case 'b':
return xlat['\b'];
case 'r':
return xlat['\r'];
case 'f':
return xlat['\f'];
case 'a':
return xlat['\a'];
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': {
uchar v = c - '0';
for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
v = v*8 + (c - '0');
return v;
} default:
return xlat[c];
}
}
Range *getRange(SubString &s){
uchar lb = unescape(s), ub;
if(s.len < 2 || *s.str != '-'){
ub = lb;
} else {
s.len--; s.str++;
ub = unescape(s);
if(ub < lb){
uchar tmp;
tmp = lb; lb = ub; ub = tmp;
}
}
return new Range(lb, ub+1);
}
RegExp *matchChar(uint c){
return new MatchOp(new Range(c, c+1));
}
RegExp *primary(Scanner &in){
RegExp *re;
switch(in.peek()){
case tIdent: {
Symbol *sym = Symbol::find(in.tokStr());
if(!sym->re)
in.fatal("can't find symbol");
re = sym->re;
break;
}
case tLParen:
in.bump();
re = expr(in);
if(in.peek() != tRParen)
in.fatal("expecting ')'");
break;
case tString: {
SubString s = in.tokStr();
s.len -= 2; s.str += 1;
if(s.len == 0){
re = new NullOp;
} else {
re = matchChar(unescape(s));
while(s.len > 0)
re = new CatOp(re, matchChar(unescape(s)));
}
break;
}
case tCClass: {
SubString s = in.tokStr();
s.len -= 2; s.str += 1;
if(s.len == 0){
re = new NullOp;
} else {
Range *r = getRange(s);
while(s.len > 0)
r = doUnion(r, getRange(s));
re = new MatchOp(r);
}
break;
}
default:
in.fatal("unexpected token");
}
in.bump();
return re;
}
RegExp *factor(Scanner &in){
RegExp *re = primary(in);
int t = in.peek();
if(t == t0orMore || t == t1orMore || t == tOptional){
int action = t;
for(;;){
in.bump();
t = in.peek();
if(t == t0orMore)
action = t0orMore;
else if(t == t1orMore)
action = (action == t1orMore) ? t1orMore : t0orMore;
else if(t == tOptional)
action = (action == tOptional) ? tOptional : t0orMore;
else
break;
}
switch(action){
case t0orMore:
re = mkAlt(new CloseOp(re), new NullOp());
break;
case t1orMore:
re = new CloseOp(re);
break;
case tOptional:
re = mkAlt(re, new NullOp());
break;
}
}
return re;
}
RegExp *term(Scanner &in){
RegExp *re = factor(in);
int t;
while((t = in.peek()) != tChoice && t != tDiff && t != tContext && t != tSemi && t != tCode && t != tRParen && t != tEOF)
re = new CatOp(re, factor(in));
return re;
}
RegExp *diff(Scanner &in){
RegExp *re = term(in);
while(in.peek() == tDiff){
in.bump();
re = mkDiff(re, term(in));
if(!re)
in.fatal("can only difference char sets");
}
return re;
}
RegExp *expr(Scanner &in){
RegExp *re = diff(in);
while(in.peek() == tChoice){
in.bump();
re = mkAlt(re, diff(in));
}
return re;
}
char *RuleOp::type = "RuleOp";
RuleOp::RuleOp(RegExp *e, RegExp *c, uint l, const SubString &s, uint a)
: ins(NULL), exp(e), ctx(c), line(l), code(s), accept(a) {
;
}
void RuleOp::calcSize(Char *rep){
exp->calcSize(rep);
ctx->calcSize(rep);
size = exp->size + ctx->size + 1;
}
void RuleOp::compile(Char *rep, Ins *i){
ins = i;
exp->compile(rep, &i[0]);
i += exp->size;
ctx->compile(rep, &i[0]);
i += ctx->size;
i->i.tag = TERM;
i->i.link = this;
}
void RuleOp::split(CharSet &s){
exp->split(s);
ctx->split(s);
}
RegExp *rule(Scanner &in, uint a){
RegExp *re = expr(in), *ctx;
if(in.peek() == tContext){
in.bump();
ctx = expr(in);
} else {
ctx = new NullOp;
}
if(in.peek() != tCode)
in.fatal("expecting code");
re = new RuleOp(re, ctx, in.tokLine, in.tokStr(), a);
in.bump();
return re;
}
RegExp *prog(Scanner &in){
uint accept = 0;
RegExp *re = rule(in, accept++);
while(in.peek() == tBang){
in.bump();
re = mkAlt(re, rule(in, accept++));
}
return re;
}
extern void printSpan(ostream&, uint, uint);
void optimize(Ins *i){
while(!isMarked(i)){
mark(i);
if(i->i.tag == CHAR){
i = (Ins*) i->i.link;
} else if(i->i.tag == GOTO || i->i.tag == FORK){
Ins *target = (Ins*) i->i.link;
optimize(target);
if(target->i.tag == GOTO)
i->i.link = target->i.link == target? i : target;
if(i->i.tag == FORK){
Ins *follow = (Ins*) &i[1];
optimize(follow);
if(follow->i.tag == GOTO && follow->i.link == follow){
i->i.tag = GOTO;
} else if(i->i.link == i){
i->i.tag = GOTO;
i->i.link = follow;
}
}
return;
} else {
++i;
}
}
}
void parse(istream &i, ostream &o){
o << "/* Generated by re2c on ";
time_t now = time(&now);
char *nowStr = ctime(&now);
o.write(nowStr, 24);
o << " */\n";
Scanner in(*i.rdbuf());
uint label = 0;
if(fileName)
o << "#line 1 \"" << fileName << "\"\n";
for(;;){
in.copy(o);
in.bump();
if(in.peek() == tEOF)
break;
while(in.peek() == tIdent){
Symbol *sym = Symbol::find(in.tokStr());
if(sym->re)
in.fatal("sym already defined");
in.bump();
if(in.peek() == tEqual){
in.bump();
sym->re = expr(in);
} else if(in.peek() == tColon){
in.bump();
sym->re = prog(in);
CharSet cs;
memset(&cs, 0, sizeof(cs));
uint j;
for(j = 0; j < nChars; ++j){
cs.rep[j] = &cs.ptn[0];
cs.ptn[j].nxt = &cs.ptn[j+1];
}
cs.freeHead = &cs.ptn[1];
*(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
cs.ptn[0].card = nChars;
cs.ptn[0].nxt = NULL;
sym->re->split(cs);
/*
for(uint k = 0; k < nChars;){
for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
printSpan(cerr, j, k);
cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
}
*/
Char rep[nChars];
for(j = 0; j < nChars; ++j){
if(!cs.rep[j]->nxt)
cs.rep[j]->nxt = &cs.ptn[j];
rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
}
sym->re->calcSize(rep);
Ins *ins = new Ins[sym->re->size+1];
memset(ins, 0, (sym->re->size+1)*sizeof(Ins));
sym->re->compile(rep, ins);
Ins *eoi = &ins[sym->re->size];
eoi->i.tag = GOTO;
eoi->i.link = eoi;
optimize(ins);
for(j = 0; j < sym->re->size;){
unmark(&ins[j]);
if(ins[j].i.tag == CHAR){
j = (Ins*) ins[j].i.link - ins;
} else {
j++;
}
}
DFA *dfa = new DFA(ins, sym->re->size, 0, 256, rep);
label = dfa->emit(sym->name, label, o);
delete dfa;
delete ins;
} else
in.fatal("expecting '=' or ':'");
if(in.peek() != tSemi)
in.fatal("expecting ';'");
in.bump();
}
if(in.peek() != tEnd)
in.fatal("expecting */");
in.bump();
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?