actions.cpp

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 538 行

CPP
538
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
*               DESCRIBE IT HERE!
*
****************************************************************************/


#include <time.h>
#include <string.h>
#include <iostream.h>
#include <iomanip.h>

#include "globals.h"
#include "parser.h"
#include "dfa.h"

Symbol *Symbol::first = NULL;

Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) {
    first = this;
}

Symbol *Symbol::find(const SubStr &str){
    for(Symbol *sym = first; sym; sym = sym->next)
        if(sym->name == str) return sym;
    return new Symbol(str);
}

void showIns(ostream &o, const Ins &i, const Ins &base){
    o.width(3);
    o << &i - &base << ": ";
    switch(i.i.tag){
    case CHAR: {
        o << "match ";
        for(const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j)
            prtCh(o, (uchar)j->c.value);
        break;
    } case GOTO:
        o << "goto " << ((Ins*) i.i.link - &base);
        break;
    case FORK:
        o << "fork " << ((Ins*) i.i.link - &base);
        break;
    case CTXT:
        o << "term " << ((RuleOp*) i.i.link)->accept;
        break;
    case TERM:
        o << "term " << ((RuleOp*) i.i.link)->accept;
        break;
    }
    o << "\n";
}

uint RegExp::fixedLength(){
    return ~0;
}

char *NullOp::type = "NullOp";

void NullOp::calcSize(Char*){
    size = 0;
}

uint NullOp::fixedLength(){
    return 0;
}

void NullOp::compile(Char*, Ins*){
    ;
}

void NullOp::split(CharSet&){
    ;
}

ostream& operator<<(ostream &o, const Range &r){
    if((r.ub - r.lb) == 1){
        prtCh(o, (uchar)r.lb);
    } else {
        prtCh(o, (uchar)r.lb); o << "-"; prtCh(o, (uchar)(r.ub-1));
    }
    return o << r.next;
}

Range *doUnion(Range *r1, Range *r2){
    Range *r;
    Range **rP = &r;
    for(;;){
        Range *s;
        if(r1->lb <= r2->lb){
            s = new Range(*r1);
        } else {
            s = new Range(*r2);
        }
        *rP = s;
        rP = &s->next;
        for(;;){
            if(r1->lb <= r2->lb){
                if(r1->lb > s->ub)
                    break;
                if(r1->ub > s->ub)
                    s->ub = r1->ub;
                if((r1 = r1->next) == NULL){
                    uint ub = 0;
                    for(; r2 && r2->lb <= s->ub; r2 = r2->next)
                        ub = r2->ub;
                    if(ub > s->ub)
                        s->ub = ub;
                    *rP = r2;
                    return r;
                }
            } else {
                if(r2->lb > s->ub)
                    break;
                if(r2->ub > s->ub)
                    s->ub = r2->ub;
                if((r2 = r2->next) == NULL){
                    uint ub = 0;
                    for(; r1 && r1->lb <= s->ub; r1 = r1->next)
                        ub = r1->ub;
                    if(ub > s->ub)
                        s->ub = ub;
                    *rP = r1;
                    return r;
                }
            }
        }
    }
}

Range *doDiff(Range *r1, Range *r2){
    Range *r, *s;
    Range **rP = &r;
    for(; r1; r1 = r1->next){
        uint lb = r1->lb;
        for(; r2 && r2->ub <= r1->lb; r2 = r2->next);
        for(; r2 && r2->lb <  r1->ub; r2 = r2->next){
            if(lb < r2->lb){
                *rP = s = new Range(lb, r2->lb);
                rP = &s->next;
            }
            if((lb = r2->ub) >= r1->ub)
                goto noMore;
        }
        *rP = s = new Range(lb, r1->ub);
        rP = &s->next;
    noMore:;
    }
    *rP = NULL;
    return r;
}

MatchOp *merge(MatchOp *m1, MatchOp *m2){
    if(!m1)
        return m2;
    if(!m2)
        return m1;
    return new MatchOp(doUnion(m1->match, m2->match));
}

char *MatchOp::type = "MatchOp";

void MatchOp::display(ostream &o) const{
    o << match;
}

void MatchOp::calcSize(Char *rep){
    size = 1;
    for(Range *r = match; r; r = r->next)
        for(uint c = r->lb; c < r->ub; ++c)
            if(rep[c] == c)
                ++size;
}

uint MatchOp::fixedLength(){
    return 1;
}

void MatchOp::compile(Char *rep, Ins *i){
    i->i.tag = CHAR;
    i->i.link = &i[size];
    Ins *j = &i[1];
    uint bump = size;
    for(Range *r = match; r; r = r->next){
        for(uint c = r->lb; c < r->ub; ++c){
            if(rep[c] == c){
                j->c.value = (uchar)c;
                j->c.bump = (uchar)--bump;
                j++;
            }
        }
    }
}

void MatchOp::split(CharSet &s){
    for(Range *r = match; r; r = r->next){
        for(uint c = r->lb; c < r->ub; ++c){
            CharPtn *x = s.rep[c], *a = x->nxt;
            if(!a){
                if(x->card == 1)
                    continue;
                x->nxt = a = s.freeHead;
                if((s.freeHead = s.freeHead->nxt) == NULL)
                    s.freeTail = &s.freeHead;
                a->nxt = NULL;
                x->fix = s.fix;
                s.fix = x;
            }
            if(--(x->card) == 0){
                *s.freeTail = x;
                *(s.freeTail = &x->nxt) = NULL;
            }
            s.rep[c] = a;
            ++(a->card);
        }
    }
    for(; s.fix; s.fix = s.fix->fix)
        if(s.fix->card)
            s.fix->nxt = NULL;
}

RegExp *mkDiff(RegExp *e1, RegExp *e2){
    MatchOp *m1, *m2;
    if((m1 = (MatchOp*) e1->isA(MatchOp::type)) == NULL)
        return NULL;
    if((m2 = (MatchOp*) e2->isA(MatchOp::type)) == NULL)
        return NULL;
    Range *r = doDiff(m1->match, m2->match);
    return r? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp;
}

RegExp *doAlt(RegExp *e1, RegExp *e2){
    if(!e1)
        return e2;
    if(!e2)
        return e1;
    return new AltOp(e1, e2);
}

RegExp *mkAlt(RegExp *e1, RegExp *e2){
    AltOp *a;
    MatchOp *m1, *m2;
    if((a = (AltOp*) e1->isA(AltOp::type)) != NULL){
        if((m1 = (MatchOp*) a->exp1->isA(MatchOp::type)) != NULL)
            e1 = a->exp2;
    } else if((m1 = (MatchOp*) e1->isA(MatchOp::type)) != NULL){
            e1 = NULL;
    }
    if((a = (AltOp*) e2->isA(AltOp::type)) != NULL){
        if((m2 = (MatchOp*) a->exp1->isA(MatchOp::type)) != NULL)
            e2 = a->exp2;
    } else if((m2 = (MatchOp*) e2->isA(MatchOp::type)) != NULL){
            e2 = NULL;
    }
    return doAlt(merge(m1, m2), doAlt(e1, e2));
}

char *AltOp::type = "AltOp";

void AltOp::calcSize(Char *rep){
    exp1->calcSize(rep);
    exp2->calcSize(rep);
    size = exp1->size + exp2->size + 2;
}

uint AltOp::fixedLength(){
    uint l1 = exp1->fixedLength();
    uint l2 = exp1->fixedLength();
    if(l1 != l2 || l1 == ~0)
        return ~0;
    return l1;
}

void AltOp::compile(Char *rep, Ins *i){
    i->i.tag = FORK;
    Ins *j = &i[exp1->size + 1];
    i->i.link = &j[1];
    exp1->compile(rep, &i[1]);
    j->i.tag = GOTO;
    j->i.link = &j[exp2->size + 1];
    exp2->compile(rep, &j[1]);
}

void AltOp::split(CharSet &s){
    exp1->split(s);
    exp2->split(s);
}

char *CatOp::type = "CatOp";

void CatOp::calcSize(Char *rep){
    exp1->calcSize(rep);
    exp2->calcSize(rep);
    size = exp1->size + exp2->size;
}

uint CatOp::fixedLength(){
    uint l1, l2;
    if((l1 = exp1->fixedLength()) != ~0 &&
       (l2 = exp2->fixedLength()) != ~0)
        return l1+l2;
    return ~0;
}

void CatOp::compile(Char *rep, Ins *i){
    exp1->compile(rep, &i[0]);
    exp2->compile(rep, &i[exp1->size]);
}

void CatOp::split(CharSet &s){
    exp1->split(s);
    exp2->split(s);
}

char *CloseOp::type = "CloseOp";

void CloseOp::calcSize(Char *rep){
    exp->calcSize(rep);
    size = exp->size + 1;
}

void CloseOp::compile(Char *rep, Ins *i){
    exp->compile(rep, &i[0]);
    i += exp->size;
    i->i.tag = FORK;
    i->i.link = &i[-exp->size];
}

void CloseOp::split(CharSet &s){
    exp->split(s);
}

RegExp *expr(Scanner &);

uchar unescape(SubStr &s){
    s.len--;
    uchar c;
    if((c = *s.str++) != '\\' || s.len == 0)
        return xlat[c];
    s.len--;
    switch(c = *s.str++){
    case 'n':
        return xlat['\n'];
    case 't':
        return xlat['\t'];
    case 'v':
        return xlat['\v'];
    case 'b':
        return xlat['\b'];
    case 'r':
        return xlat['\r'];
    case 'f':
        return xlat['\f'];
    case 'a':
        return xlat['\a'];
    case '0': case '1': case '2': case '3':
    case '4': case '5': case '6': case '7': {
        uchar v = c - '0';
        for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
            v = v*8 + (c - '0');
        return v;
    } default:
        return xlat[c];
    }
}

Range *getRange(SubStr &s){
    uchar lb = unescape(s), ub;
    if(s.len < 2 || *s.str != '-'){
        ub = lb;
    } else {
        s.len--; s.str++;
        ub = unescape(s);
        if(ub < lb){
            uchar tmp;
            tmp = lb; lb = ub; ub = tmp;
        }
    }
    return new Range(lb, ub+1);
}

RegExp *matchChar(uint c){
    return new MatchOp(new Range(c, c+1));
}

RegExp *strToRE(SubStr s){
    s.len -= 2; s.str += 1;
    if(s.len == 0)
        return new NullOp;
    RegExp *re = matchChar(unescape(s));
    while(s.len > 0)
        re = new CatOp(re, matchChar(unescape(s)));
    return re;
}

RegExp *ranToRE(SubStr s){
    s.len -= 2; s.str += 1;
    if(s.len == 0)
        return new NullOp;
    Range *r = getRange(s);
    while(s.len > 0)
        r = doUnion(r, getRange(s));
    return new MatchOp(r);
}

char *RuleOp::type = "RuleOp";

RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a)
        : ins(NULL), exp(e), ctx(c), code(t), accept(a) {
    ;
}

void RuleOp::calcSize(Char *rep){
    exp->calcSize(rep);
    ctx->calcSize(rep);
    size = exp->size + ctx->size + 1;
}

void RuleOp::compile(Char *rep, Ins *i){
    ins = i;
    exp->compile(rep, &i[0]);
    i += exp->size;
    ctx->compile(rep, &i[0]);
    i += ctx->size;
    i->i.tag = TERM;
    i->i.link = this;
}

void RuleOp::split(CharSet &s){
    exp->split(s);
    ctx->split(s);
}

extern void printSpan(ostream&, uint, uint);

void optimize(Ins *i){
    while(!isMarked(i)){
        mark(i);
        if(i->i.tag == CHAR){
            i = (Ins*) i->i.link;
        } else if(i->i.tag == GOTO || i->i.tag == FORK){
            Ins *target = (Ins*) i->i.link;
            optimize(target);
            if(target->i.tag == GOTO)
                i->i.link = target->i.link == target? i : target;
            if(i->i.tag == FORK){
                Ins *follow = (Ins*) &i[1];
                optimize(follow);
                if(follow->i.tag == GOTO && follow->i.link == follow){
                    i->i.tag = GOTO;
                } else if(i->i.link == i){
                    i->i.tag = GOTO;
                    i->i.link = follow;
                }
            }
            return;
        } else {
            ++i;
        }
    }
}

void genCode(ostream& o, RegExp *re){
    uint j;
    CharSet cs;
    memset(&cs, 0, sizeof(cs));
    for(j = 0; j < nChars; ++j){
        cs.rep[j] = &cs.ptn[0];
        cs.ptn[j].nxt = &cs.ptn[j+1];
    }
    cs.freeHead = &cs.ptn[1];
    *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
    cs.ptn[0].card = nChars;
    cs.ptn[0].nxt = NULL;
    re->split(cs);
/*
    for(uint k = 0; k < nChars;){
        for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
        printSpan(cerr, j, k);
        cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
    }
*/
    Char rep[nChars];
    for(j = 0; j < nChars; ++j){
        if(!cs.rep[j]->nxt)
            cs.rep[j]->nxt = &cs.ptn[j];
        rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
    }

    re->calcSize(rep);
    Ins *ins = new Ins[re->size+1];
    memset(ins, 0, (re->size+1)*sizeof(Ins));
    re->compile(rep, ins);
    Ins *eoi = &ins[re->size];
    eoi->i.tag = GOTO;
    eoi->i.link = eoi;

    optimize(ins);
    for(j = 0; j < re->size;){
        unmark(&ins[j]);
        if(ins[j].i.tag == CHAR){
            j = (Ins*) ins[j].i.link - ins;
        } else {
            j++;
        }
    }

    DFA *dfa = new DFA(ins, re->size, 0, 256, rep);
    dfa->emit(o);
    delete dfa;
    delete [] ins;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?