📄 regexp.c
字号:
#include "sam.h"Rangeset sel;String lastregexp;/* * Machine Information */typedef struct Inst Inst;struct Inst{ long type; /* < 0x10000 ==> literal, otherwise action */ union { int rsid; int rsubid; int class; struct Inst *rother; struct Inst *rright; } r; union{ struct Inst *lleft; struct Inst *lnext; } l;};#define sid r.rsid#define subid r.rsubid#define rclass r.class#define other r.rother#define right r.rright#define left l.lleft#define next l.lnext#define NPROG 1024Inst program[NPROG];Inst *progp;Inst *startinst; /* First inst. of program; might not be program[0] */Inst *bstartinst; /* same for backwards machine */typedef struct Ilist Ilist;struct Ilist{ Inst *inst; /* Instruction of the thread */ Rangeset se; Posn startp; /* first char of match */};#define NLIST 128Ilist *tl, *nl; /* This list, next list */Ilist list[2][NLIST];static Rangeset sempty;/* * Actions and Tokens * * 0x100xx are operators, value == precedence * 0x200xx are tokens, i.e. operands for operators */#define OPERATOR 0x10000 /* Bitmask of all operators */#define START 0x10000 /* Start, used for marker on stack */#define RBRA 0x10001 /* Right bracket, ) */#define LBRA 0x10002 /* Left bracket, ( */#define OR 0x10003 /* Alternation, | */#define CAT 0x10004 /* Concatentation, implicit operator */#define STAR 0x10005 /* Closure, * */#define PLUS 0x10006 /* a+ == aa* */#define QUEST 0x10007 /* a? == a|nothing, i.e. 0 or 1 a's */#define ANY 0x20000 /* Any character but newline, . */#define NOP 0x20001 /* No operation, internal use only */#define BOL 0x20002 /* Beginning of line, ^ */#define EOL 0x20003 /* End of line, $ */#define CCLASS 0x20004 /* Character class, [] */#define NCCLASS 0x20005 /* Negated character class, [^] */#define END 0x20077 /* Terminate: match found */#define ISATOR 0x10000#define ISAND 0x20000/* * Parser Information */typedef struct Node Node;struct Node{ Inst *first; Inst *last;};#define NSTACK 20Node andstack[NSTACK];Node *andp;int atorstack[NSTACK];int *atorp;int lastwasand; /* Last token was operand */int cursubid;int subidstack[NSTACK];int *subidp;int backwards;int nbra;Rune *exprp; /* pointer to next character in source expression */#define DCLASS 10 /* allocation increment */int nclass; /* number active */int Nclass; /* high water mark */Rune **class;int negateclass;void addinst(Ilist *l, Inst *inst, Rangeset *sep);void newmatch(Rangeset*);void bnewmatch(Rangeset*);void pushand(Inst*, Inst*);void pushator(int);Node *popand(int);int popator(void);void startlex(Rune*);int lex(void);void operator(int);void operand(int);void evaluntil(int);void optimize(Inst*);void bldcclass(void);voidregerror(Err e){ Strzero(&lastregexp); error(e);}voidregerror_c(Err e, int c){ Strzero(&lastregexp); error_c(e, c);}Inst *newinst(int t){ if(progp >= &program[NPROG]) regerror(Etoolong); progp->type = t; progp->left = 0; progp->right = 0; return progp++;}Inst *realcompile(Rune *s){ int token; startlex(s); atorp = atorstack; andp = andstack; subidp = subidstack; cursubid = 0; lastwasand = FALSE; /* Start with a low priority operator to prime parser */ pushator(START-1); while((token=lex()) != END){ if((token&ISATOR) == OPERATOR) operator(token); else operand(token); } /* Close with a low priority operator */ evaluntil(START); /* Force END */ operand(END); evaluntil(START); if(nbra) regerror(Eleftpar); --andp; /* points to first and only operand */ return andp->first;}voidcompile(String *s){ int i; Inst *oprogp; if(Strcmp(s, &lastregexp)==0) return; for(i=0; i<nclass; i++) free(class[i]); nclass = 0; progp = program; backwards = FALSE; startinst = realcompile(s->s); optimize(program); oprogp = progp; backwards = TRUE; bstartinst = realcompile(s->s); optimize(oprogp); Strduplstr(&lastregexp, s);}voidoperand(int t){ Inst *i; if(lastwasand) operator(CAT); /* catenate is implicit */ i = newinst(t); if(t == CCLASS){ if(negateclass) i->type = NCCLASS; /* UGH */ i->rclass = nclass-1; /* UGH */ } pushand(i, i); lastwasand = TRUE;}voidoperator(int t){ if(t==RBRA && --nbra<0) regerror(Erightpar); if(t==LBRA){/* * if(++cursubid >= NSUBEXP) * regerror(Esubexp); */ cursubid++; /* silently ignored */ nbra++; if(lastwasand) operator(CAT); }else evaluntil(t); if(t!=RBRA) pushator(t); lastwasand = FALSE; if(t==STAR || t==QUEST || t==PLUS || t==RBRA) lastwasand = TRUE; /* these look like operands */}voidcant(char *s){ char buf[100]; sprint(buf, "regexp: can't happen: %s", s); panic(buf);}voidpushand(Inst *f, Inst *l){ if(andp >= &andstack[NSTACK]) cant("operand stack overflow"); andp->first = f; andp->last = l; andp++;}voidpushator(int t){ if(atorp >= &atorstack[NSTACK]) cant("operator stack overflow"); *atorp++=t; if(cursubid >= NSUBEXP) *subidp++= -1; else *subidp++=cursubid;}Node *popand(int op){ if(andp <= &andstack[0]) if(op) regerror_c(Emissop, op); else regerror(Ebadregexp); return --andp;}intpopator(void){ if(atorp <= &atorstack[0]) cant("operator stack underflow"); --subidp; return *--atorp;}voidevaluntil(int pri){ Node *op1, *op2, *t; Inst *inst1, *inst2; while(pri==RBRA || atorp[-1]>=pri){ switch(popator()){ case LBRA: op1 = popand('('); inst2 = newinst(RBRA); inst2->subid = *subidp; op1->last->next = inst2; inst1 = newinst(LBRA); inst1->subid = *subidp; inst1->next = op1->first; pushand(inst1, inst2); return; /* must have been RBRA */ default: panic("unknown regexp operator"); break; case OR: op2 = popand('|'); op1 = popand('|'); inst2 = newinst(NOP); op2->last->next = inst2; op1->last->next = inst2; inst1 = newinst(OR); inst1->right = op1->first; inst1->left = op2->first; pushand(inst1, inst2); break; case CAT: op2 = popand(0); op1 = popand(0); if(backwards && op2->first->type!=END) t = op1, op1 = op2, op2 = t; op1->last->next = op2->first; pushand(op1->first, op2->last); break; case STAR: op2 = popand('*'); inst1 = newinst(OR); op2->last->next = inst1; inst1->right = op2->first; pushand(inst1, inst1); break; case PLUS: op2 = popand('+'); inst1 = newinst(OR); op2->last->next = inst1; inst1->right = op2->first; pushand(op2->first, inst1); break; case QUEST: op2 = popand('?'); inst1 = newinst(OR); inst2 = newinst(NOP); inst1->left = inst2; inst1->right = op2->first; op2->last->next = inst2; pushand(inst1, inst2); break; } }}voidoptimize(Inst *start){ Inst *inst, *target; for(inst=start; inst->type!=END; inst++){ target = inst->next; while(target->type == NOP) target = target->next; inst->next = target; }}#ifdef DEBUGvoiddumpstack(void){ Node *stk; int *ip; dprint("operators\n"); for(ip = atorstack; ip<atorp; ip++) dprint("0%o\n", *ip); dprint("operands\n"); for(stk = andstack; stk<andp; stk++) dprint("0%o\t0%o\n", stk->first->type, stk->last->type);}voiddump(void){ Inst *l; l = program; do{ dprint("%d:\t0%o\t%d\t%d\n", l-program, l->type, l->left-program, l->right-program); }while(l++->type);}#endifvoidstartlex(Rune *s){ exprp = s; nbra = 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -