📄 regx.c
字号:
#include <u.h>#include <libc.h>#include <draw.h>#include <thread.h>#include <cursor.h>#include <mouse.h>#include <keyboard.h>#include <frame.h>#include <fcall.h>#include <plumb.h>#include "dat.h"#include "fns.h"Rangeset sel;Rune *lastregexp;/* * Machine Information */typedef struct Inst Inst;struct Inst{ uint type; /* < 0x10000 ==> literal, otherwise action */ union { int sid; int subid; int class; Inst *other; Inst *right; }; union{ Inst *left; Inst *next; };};#define NPROG 1024Inst program[NPROG];Inst *progp;Inst *startinst; /* First inst. of program; might not be program[0] */Inst *bstartinst; /* same for backwards machine */Channel *rechan; /* chan(Inst*) */typedef struct Ilist Ilist;struct Ilist{ Inst *inst; /* Instruction of the thread */ Rangeset se; uint startp; /* first char of match */};#define NLIST 128Ilist *tl, *nl; /* This list, next list */Ilist list[2][NLIST];static Rangeset sempty;/* * Actions and Tokens * * 0x100xx are operators, value == precedence * 0x200xx are tokens, i.e. operands for operators */#define OPERATOR 0x10000 /* Bitmask of all operators */#define START 0x10000 /* Start, used for marker on stack */#define RBRA 0x10001 /* Right bracket, ) */#define LBRA 0x10002 /* Left bracket, ( */#define OR 0x10003 /* Alternation, | */#define CAT 0x10004 /* Concatentation, implicit operator */#define STAR 0x10005 /* Closure, * */#define PLUS 0x10006 /* a+ == aa* */#define QUEST 0x10007 /* a? == a|nothing, i.e. 0 or 1 a's */#define ANY 0x20000 /* Any character but newline, . */#define NOP 0x20001 /* No operation, internal use only */#define BOL 0x20002 /* Beginning of line, ^ */#define EOL 0x20003 /* End of line, $ */#define CCLASS 0x20004 /* Character class, [] */#define NCCLASS 0x20005 /* Negated character class, [^] */#define END 0x20077 /* Terminate: match found */#define ISATOR 0x10000#define ISAND 0x20000/* * Parser Information */typedef struct Node Node;struct Node{ Inst *first; Inst *last;};#define NSTACK 20Node andstack[NSTACK];Node *andp;int atorstack[NSTACK];int *atorp;int lastwasand; /* Last token was operand */int cursubid;int subidstack[NSTACK];int *subidp;int backwards;int nbra;Rune *exprp; /* pointer to next character in source expression */#define DCLASS 10 /* allocation increment */int nclass; /* number active */int Nclass; /* high water mark */Rune **class;int negateclass;void addinst(Ilist *l, Inst *inst, Rangeset *sep);void newmatch(Rangeset*);void bnewmatch(Rangeset*);void pushand(Inst*, Inst*);void pushator(int);Node *popand(int);int popator(void);void startlex(Rune*);int lex(void);void operator(int);void operand(int);void evaluntil(int);void optimize(Inst*);void bldcclass(void);voidrxinit(void){ rechan = chancreate(sizeof(Inst*), 0); lastregexp = runemalloc(1);}voidregerror(char *e){ lastregexp[0] = 0; warning(nil, "regexp: %s\n", e); sendp(rechan, nil); threadexits(nil);}Inst *newinst(int t){ if(progp >= &program[NPROG]) regerror("expression too long"); progp->type = t; progp->left = nil; progp->right = nil; return progp++;}voidrealcompile(void *arg){ int token; Rune *s; threadsetname("regcomp"); s = arg; startlex(s); atorp = atorstack; andp = andstack; subidp = subidstack; cursubid = 0; lastwasand = FALSE; /* Start with a low priority operator to prime parser */ pushator(START-1); while((token=lex()) != END){ if((token&ISATOR) == OPERATOR) operator(token); else operand(token); } /* Close with a low priority operator */ evaluntil(START); /* Force END */ operand(END); evaluntil(START); if(nbra) regerror("unmatched `('"); --andp; /* points to first and only operand */ sendp(rechan, andp->first); threadexits(nil);}/* r is null terminated */intrxcompile(Rune *r){ int i, nr; Inst *oprogp; nr = runestrlen(r)+1; if(runeeq(lastregexp, runestrlen(lastregexp)+1, r, nr)==TRUE) return TRUE; lastregexp[0] = 0; for(i=0; i<nclass; i++) free(class[i]); nclass = 0; progp = program; backwards = FALSE; bstartinst = nil; threadcreate(realcompile, r, STACK); startinst = recvp(rechan); if(startinst == nil) return FALSE; optimize(program); oprogp = progp; backwards = TRUE; threadcreate(realcompile, r, STACK); bstartinst = recvp(rechan); if(bstartinst == nil) return FALSE; optimize(oprogp); lastregexp = runerealloc(lastregexp, nr); runemove(lastregexp, r, nr); return TRUE;}voidoperand(int t){ Inst *i; if(lastwasand) operator(CAT); /* catenate is implicit */ i = newinst(t); if(t == CCLASS){ if(negateclass) i->type = NCCLASS; /* UGH */ i->class = nclass-1; /* UGH */ } pushand(i, i); lastwasand = TRUE;}voidoperator(int t){ if(t==RBRA && --nbra<0) regerror("unmatched `)'"); if(t==LBRA){ cursubid++; /* silently ignored */ nbra++; if(lastwasand) operator(CAT); }else evaluntil(t); if(t!=RBRA) pushator(t); lastwasand = FALSE; if(t==STAR || t==QUEST || t==PLUS || t==RBRA) lastwasand = TRUE; /* these look like operands */}voidpushand(Inst *f, Inst *l){ if(andp >= &andstack[NSTACK]) error("operand stack overflow"); andp->first = f; andp->last = l; andp++;}voidpushator(int t){ if(atorp >= &atorstack[NSTACK]) error("operator stack overflow"); *atorp++=t; if(cursubid >= NRange) *subidp++= -1; else *subidp++=cursubid;}Node *popand(int op){ char buf[64]; if(andp <= &andstack[0]) if(op){ sprint(buf, "missing operand for %c", op); regerror(buf); }else regerror("malformed regexp"); return --andp;}intpopator(){ if(atorp <= &atorstack[0]) error("operator stack underflow"); --subidp; return *--atorp;}voidevaluntil(int pri){ Node *op1, *op2, *t; Inst *inst1, *inst2; while(pri==RBRA || atorp[-1]>=pri){ switch(popator()){ case LBRA: op1 = popand('('); inst2 = newinst(RBRA); inst2->subid = *subidp; op1->last->next = inst2; inst1 = newinst(LBRA); inst1->subid = *subidp; inst1->next = op1->first; pushand(inst1, inst2); return; /* must have been RBRA */ default: error("unknown regexp operator"); break; case OR: op2 = popand('|'); op1 = popand('|'); inst2 = newinst(NOP); op2->last->next = inst2; op1->last->next = inst2; inst1 = newinst(OR); inst1->right = op1->first; inst1->left = op2->first; pushand(inst1, inst2); break; case CAT: op2 = popand(0); op1 = popand(0); if(backwards && op2->first->type!=END){ t = op1; op1 = op2; op2 = t; } op1->last->next = op2->first; pushand(op1->first, op2->last); break; case STAR: op2 = popand('*'); inst1 = newinst(OR); op2->last->next = inst1; inst1->right = op2->first; pushand(inst1, inst1); break; case PLUS: op2 = popand('+'); inst1 = newinst(OR); op2->last->next = inst1; inst1->right = op2->first; pushand(op2->first, inst1); break; case QUEST: op2 = popand('?'); inst1 = newinst(OR); inst2 = newinst(NOP); inst1->left = inst2; inst1->right = op2->first; op2->last->next = inst2; pushand(inst1, inst2); break; } }}voidoptimize(Inst *start){ Inst *inst, *target; for(inst=start; inst->type!=END; inst++){ target = inst->next; while(target->type == NOP) target = target->next; inst->next = target; }}voidstartlex(Rune *s){ exprp = s; nbra = 0;}intlex(void){ int c; c = *exprp++; switch(c){ case '\\': if(*exprp) if((c= *exprp++)=='n') c='\n'; break; case 0: c = END; --exprp; /* In case we come here again */ break; case '*': c = STAR; break; case '?': c = QUEST; break; case '+': c = PLUS; break; case '|':
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -