parser.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 670 行 · 第 1/2 页

C
670
字号

uchar unescape(SubString &s){
    s.len--;
    uchar c;
    if((c = *s.str++) != '\\' || s.len == 0)
        return xlat[c];
    s.len--;
    switch(c = *s.str++){
    case 'n':
        return xlat['\n'];
    case 't':
        return xlat['\t'];
    case 'v':
        return xlat['\v'];
    case 'b':
        return xlat['\b'];
    case 'r':
        return xlat['\r'];
    case 'f':
        return xlat['\f'];
    case 'a':
        return xlat['\a'];
    case '0': case '1': case '2': case '3':
    case '4': case '5': case '6': case '7': {
        uchar v = c - '0';
        for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
            v = v*8 + (c - '0');
        return v;
    } default:
        return xlat[c];
    }
}

Range *getRange(SubString &s){
    uchar lb = unescape(s), ub;
    if(s.len < 2 || *s.str != '-'){
        ub = lb;
    } else {
        s.len--; s.str++;
        ub = unescape(s);
        if(ub < lb){
            uchar tmp;
            tmp = lb; lb = ub; ub = tmp;
        }
    }
    return new Range(lb, ub+1);
}

RegExp *matchChar(uint c){
    return new MatchOp(new Range(c, c+1));
}

RegExp *primary(Scanner &in){
    RegExp *re;
    switch(in.peek()){
    case tIdent: {
        Symbol *sym = Symbol::find(in.tokStr());
        if(!sym->re)
            in.fatal("can't find symbol");
        re = sym->re;
        break;
    }
    case tLParen:
        in.bump();
        re = expr(in);
        if(in.peek() != tRParen)
            in.fatal("expecting ')'");
        break;
    case tString: {
        SubString s = in.tokStr();
        s.len -= 2; s.str += 1;
        if(s.len == 0){
            re = new NullOp;
        } else {
            re = matchChar(unescape(s));
            while(s.len > 0)
                re = new CatOp(re, matchChar(unescape(s)));
        }
        break;
    }
    case tCClass: {
        SubString s = in.tokStr();
        s.len -= 2; s.str += 1;
        if(s.len == 0){
            re = new NullOp;
        } else {
            Range *r = getRange(s);
            while(s.len > 0)
                r = doUnion(r, getRange(s));
            re = new MatchOp(r);
        }
        break;
    }
    default:
        in.fatal("unexpected token");
    }
    in.bump();
    return re;
}

RegExp *factor(Scanner &in){
    RegExp *re = primary(in);
    int t = in.peek();
    if(t == t0orMore || t == t1orMore || t == tOptional){
        int action = t;
        for(;;){
            in.bump();
            t = in.peek();
            if(t == t0orMore)
                action = t0orMore;
            else if(t == t1orMore)
                action = (action == t1orMore) ? t1orMore : t0orMore;
            else if(t == tOptional)
                action = (action == tOptional) ? tOptional : t0orMore;
            else
                break;
        }
        switch(action){
        case t0orMore:
            re = mkAlt(new CloseOp(re), new NullOp());
            break;
        case t1orMore:
            re = new CloseOp(re);
            break;
        case tOptional:
            re = mkAlt(re, new NullOp());
            break;
        }
    }
    return re;
}

RegExp *term(Scanner &in){
    RegExp *re = factor(in);
    int t;
    while((t = in.peek()) != tChoice && t != tDiff && t != tContext && t != tSemi && t != tCode && t != tRParen && t != tEOF)
        re = new CatOp(re, factor(in));
    return re;
}

RegExp *diff(Scanner &in){
    RegExp *re = term(in);
    while(in.peek() == tDiff){
        in.bump();
        re = mkDiff(re, term(in));
        if(!re)
            in.fatal("can only difference char sets");
    }
    return re;
}

RegExp *expr(Scanner &in){
    RegExp *re = diff(in);
    while(in.peek() == tChoice){
        in.bump();
        re = mkAlt(re, diff(in));
    }
    return re;
}

char *RuleOp::type = "RuleOp";

RuleOp::RuleOp(RegExp *e, RegExp *c, uint l, const SubString &s, uint a)
        : ins(NULL), exp(e), ctx(c), line(l), code(s), accept(a) {
    ;
}

void RuleOp::calcSize(Char *rep){
    exp->calcSize(rep);
    ctx->calcSize(rep);
    size = exp->size + ctx->size + 1;
}

void RuleOp::compile(Char *rep, Ins *i){
    ins = i;
    exp->compile(rep, &i[0]);
    i += exp->size;
    ctx->compile(rep, &i[0]);
    i += ctx->size;
    i->i.tag = TERM;
    i->i.link = this;
}

void RuleOp::split(CharSet &s){
    exp->split(s);
    ctx->split(s);
}

RegExp *rule(Scanner &in, uint a){
    RegExp *re = expr(in), *ctx;
    if(in.peek() == tContext){
        in.bump();
        ctx = expr(in);
    } else {
        ctx = new NullOp;
    }
    if(in.peek() != tCode)
        in.fatal("expecting code");
    re = new RuleOp(re, ctx, in.tokLine, in.tokStr(), a);
    in.bump();
    return re;
}

RegExp *prog(Scanner &in){
    uint accept = 0;
    RegExp *re = rule(in, accept++);
    while(in.peek() == tBang){
        in.bump();
        re = mkAlt(re, rule(in, accept++));
    }
    return re;
}

extern void printSpan(ostream&, uint, uint);

void optimize(Ins *i){
    while(!isMarked(i)){
        mark(i);
        if(i->i.tag == CHAR){
            i = (Ins*) i->i.link;
        } else if(i->i.tag == GOTO || i->i.tag == FORK){
            Ins *target = (Ins*) i->i.link;
            optimize(target);
            if(target->i.tag == GOTO)
                i->i.link = target->i.link == target? i : target;
            if(i->i.tag == FORK){
                Ins *follow = (Ins*) &i[1];
                optimize(follow);
                if(follow->i.tag == GOTO && follow->i.link == follow){
                    i->i.tag = GOTO;
                } else if(i->i.link == i){
                    i->i.tag = GOTO;
                    i->i.link = follow;
                }
            }
            return;
        } else {
            ++i;
        }
    }
}

void parse(istream &i, ostream &o){
    o << "/* Generated by re2c on ";
    time_t now = time(&now);
    char *nowStr = ctime(&now);
    o.write(nowStr, 24);
    o << " */\n";

    Scanner in(*i.rdbuf());
    uint label = 0;
    if(fileName)
        o << "#line 1 \"" << fileName << "\"\n";

    for(;;){

        in.copy(o);
        in.bump();

        if(in.peek() == tEOF)
            break;

        while(in.peek() == tIdent){
            Symbol *sym = Symbol::find(in.tokStr());
            if(sym->re)
                in.fatal("sym already defined");
            in.bump();
            if(in.peek() == tEqual){
                in.bump();
                sym->re = expr(in);
            } else if(in.peek() == tColon){
                in.bump();
                sym->re = prog(in);
                CharSet cs;
                memset(&cs, 0, sizeof(cs));
                uint j;
                for(j = 0; j < nChars; ++j){
                    cs.rep[j] = &cs.ptn[0];
                    cs.ptn[j].nxt = &cs.ptn[j+1];
                }
                cs.freeHead = &cs.ptn[1];
                *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
                cs.ptn[0].card = nChars;
                cs.ptn[0].nxt = NULL;
                sym->re->split(cs);
/*
                for(uint k = 0; k < nChars;){
                    for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
                    printSpan(cerr, j, k);
                    cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
                }
*/
                Char rep[nChars];
                for(j = 0; j < nChars; ++j){
                    if(!cs.rep[j]->nxt)
                        cs.rep[j]->nxt = &cs.ptn[j];
                    rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
                }

                sym->re->calcSize(rep);
                Ins *ins = new Ins[sym->re->size+1];
                memset(ins, 0, (sym->re->size+1)*sizeof(Ins));
                sym->re->compile(rep, ins);
                Ins *eoi = &ins[sym->re->size];
                eoi->i.tag = GOTO;
                eoi->i.link = eoi;

                optimize(ins);
                for(j = 0; j < sym->re->size;){
                    unmark(&ins[j]);
                    if(ins[j].i.tag == CHAR){
                        j = (Ins*) ins[j].i.link - ins;
                    } else {
                        j++;
                    }
                }

                DFA *dfa = new DFA(ins, sym->re->size, 0, 256, rep);
                label = dfa->emit(sym->name, label, o);
                delete dfa;
                delete ins;
            } else
                in.fatal("expecting '=' or ':'");
            if(in.peek() != tSemi)
                in.fatal("expecting ';'");
            in.bump();
        }

        if(in.peek() != tEnd)
            in.fatal("expecting */");
        in.bump();

    }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?