📄 engine.c
字号:
return NULL; break; case OBOL: if ((sp == m->beginp && !(m->eflags & REG_NOTBOL)) || (sp < m->endp && *(sp - 1) == '\n' && (m->g->cflags & REG_NEWLINE))) { /* yes */ } else return NULL; break; case OEOL: if ((sp == m->endp && !(m->eflags & REG_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags & REG_NEWLINE))) { /* yes */ } else return NULL; break; case OBOW: if (((sp == m->beginp && !(m->eflags & REG_NOTBOL)) || (sp < m->endp && *(sp - 1) == '\n' && (m->g->cflags & REG_NEWLINE)) || (sp > m->beginp && !ISWORD(*(sp - 1)))) && (sp < m->endp && ISWORD(*sp))) { /* yes */ } else return NULL; break; case OEOW: if (((sp == m->endp && !(m->eflags & REG_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags & REG_NEWLINE)) || (sp < m->endp && !ISWORD(*sp))) && (sp > m->beginp && ISWORD(*(sp - 1)))) { /* yes */ } else return NULL; break; case O_QUEST: break; case OOR1: /* matches null but needs to skip */ ss++; s = m->g->strip[ss]; do { assert(OP(s) == OOR2); ss += OPND(s); } while (OP(s = m->g->strip[ss]) != O_CH); /* note that the ss++ gets us past the O_CH */ break; default: /* have to make a choice */ hard = 1; break; } if (!hard) { /* that was it! */ if (sp != stop) return NULL; return sp; } ss--; /* adjust for the for's final increment */ /* the hard stuff */ AT("hard", sp, stop, ss, stopst); s = m->g->strip[ss]; switch (OP(s)) { case OBACK_: /* the vilest depths */ i = OPND(s); assert(0 < i && i <= m->g->nsub); if (m->pmatch[i].rm_eo == -1) return NULL; assert(m->pmatch[i].rm_so != -1); len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; assert(stop - m->beginp >= len); if (sp > stop - len) return NULL; /* not enough left to match */ ssp = m->offp + m->pmatch[i].rm_so; if (memcmp(sp, ssp, len) != 0) return NULL; while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; return backref(m, sp + len, stop, ss + 1, stopst, lev); break; case OQUEST_: /* to null or not */ dp = backref(m, sp, stop, ss + 1, stopst, lev); if (dp != NULL) return dp; /* not */ return backref(m, sp, stop, ss + OPND(s) + 1, stopst, lev); break; case OPLUS_: assert(m->lastpos != NULL); assert(lev + 1 <= m->g->nplus); m->lastpos[lev + 1] = sp; return backref(m, sp, stop, ss + 1, stopst, lev + 1); break; case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ return backref(m, sp, stop, ss + 1, stopst, lev - 1); /* try another pass */ m->lastpos[lev] = sp; dp = backref(m, sp, stop, ss - OPND(s) + 1, stopst, lev); if (dp == NULL) return backref(m, sp, stop, ss + 1, stopst, lev - 1); else return dp; break; case OCH_: /* find the right one, if any */ ssub = ss + 1; esub = ss + OPND(s) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ dp = backref(m, sp, stop, ssub, esub, lev); if (dp != NULL) return dp; /* that one missed, try next one */ if (OP(m->g->strip[esub]) == O_CH) return NULL;/* there is none */ esub++; assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); if (OP(m->g->strip[esub]) == OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } break; case OLPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_so; m->pmatch[i].rm_so = sp - m->offp; dp = backref(m, sp, stop, ss + 1, stopst, lev); if (dp != NULL) return dp; m->pmatch[i].rm_so = offsave; return NULL; break; case ORPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; m->pmatch[i].rm_eo = sp - m->offp; dp = backref(m, sp, stop, ss + 1, stopst, lev); if (dp != NULL) return dp; m->pmatch[i].rm_eo = offsave; return NULL; break; default: /* uh oh */ assert(nope); break; } /* "can't happen" */ assert(nope); /* NOTREACHED */ return 0;}/* - fast - step through the string at top speed == static char *fast(struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst); */static pg_wchar * /* where tentative match ended, or NULL */fast(m, start, stop, startst, stopst)struct match *m;pg_wchar *start;pg_wchar *stop;sopno startst;sopno stopst;{ states st = m->st; states fresh = m->fresh; states tmp = m->tmp; pg_wchar *p = start; int c = (start == m->beginp) ? OUT : *(start - 1); int lastc; /* previous c */ int flagch; int i; pg_wchar *coldp; /* last p after which no match was * underway */ CLEAR(st); SET1(st, startst); st = step(m->g, startst, stopst, st, NOTHING, st); ASSIGN(fresh, st); SP("start", st, *p); coldp = NULL; for (;;) { /* next character */ lastc = c; c = (p == m->endp) ? OUT : *p; if (EQ(st, fresh)) coldp = p; /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ((lastc == '\n' && m->g->cflags & REG_NEWLINE) || (lastc == OUT && !(m->eflags & REG_NOTBOL))) { flagch = BOL; i = m->g->nbol; } if ((c == '\n' && m->g->cflags & REG_NEWLINE) || (c == OUT && !(m->eflags & REG_NOTEOL))) { flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } if (i != 0) { for (; i > 0; i--) st = step(m->g, startst, stopst, st, flagch, st); SP("boleol", st, c); } /* how about a word boundary? */ if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && (c != OUT && ISWORD(c))) flagch = BOW; if ((lastc != OUT && ISWORD(lastc)) && (flagch == EOL || (c != OUT && !ISWORD(c)))) flagch = EOW; if (flagch == BOW || flagch == EOW) { st = step(m->g, startst, stopst, st, flagch, st); SP("boweow", st, c); } /* are we done? */ if (ISSET(st, stopst) || p == stop) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, fresh); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("aft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p++; } assert(coldp != NULL); m->coldp = coldp; if (ISSET(st, stopst)) return p + 1; else return NULL;}/* - slow - step through the string more deliberately == static char *slow(struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst); */static pg_wchar * /* where it ended */slow(m, start, stop, startst, stopst)struct match *m;pg_wchar *start;pg_wchar *stop;sopno startst;sopno stopst;{ states st = m->st; states empty = m->empty; states tmp = m->tmp; pg_wchar *p = start; int c = (start == m->beginp) ? OUT : *(start - 1); int lastc; /* previous c */ int flagch; int i; pg_wchar *matchp; /* last p at which a match ended */ AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); matchp = NULL; for (;;) { /* next character */ lastc = c; c = (p == m->endp) ? OUT : *p; /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ((lastc == '\n' && m->g->cflags & REG_NEWLINE) || (lastc == OUT && !(m->eflags & REG_NOTBOL))) { flagch = BOL; i = m->g->nbol; } if ((c == '\n' && m->g->cflags & REG_NEWLINE) || (c == OUT && !(m->eflags & REG_NOTEOL))) { flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } if (i != 0) { for (; i > 0; i--) st = step(m->g, startst, stopst, st, flagch, st); SP("sboleol", st, c); } /* how about a word boundary? */ if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && (c != OUT && ISWORD(c))) flagch = BOW; if ((lastc != OUT && ISWORD(lastc)) && (flagch == EOL || (c != OUT && !ISWORD(c)))) flagch = EOW; if (flagch == BOW || flagch == EOW) { st = step(m->g, startst, stopst, st, flagch, st); SP("sboweow", st, c); } /* are we done? */ if (ISSET(st, stopst)) matchp = p; if (EQ(st, empty) || p == stop) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, empty); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p++; } return matchp;}/* - step - map set of states reachable before char to set reachable after == static states step(struct re_guts *g, sopno start, sopno stop, \ == states bef, int ch, states aft); == #define BOL (OUT+1) == #define EOL (BOL+1) == #define BOLEOL (BOL+2) == #define NOTHING (BOL+3) == #define BOW (BOL+4) == #define EOW (BOL+5) == #define CODEMAX (BOL+5) // highest code used == #define NONCHAR(c) ((c) > CHAR_MAX) == #define NNONCHAR (CODEMAX-CHAR_MAX) */static statesstep(g, start, stop, bef, ch, aft)struct re_guts *g;sopno start; /* start state within strip */sopno stop; /* state after stop state within strip */states bef; /* states reachable before */int ch; /* character or NONCHAR code */states aft; /* states already known reachable after */{ cset *cs; sop s; sopno pc; onestate here; /* note, macros know this name */ sopno look; int i; for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { s = g->strip[pc]; switch (OP(s)) { case OEND: assert(pc == stop - 1); break; case OCHAR: /* only characters can match */ assert(!NONCHAR(ch) || ch != (pg_wchar) OPND(s)); if (ch == (pg_wchar) OPND(s)) FWD(aft, bef, 1); break; case OBOL: if (ch == BOL || ch == BOLEOL) FWD(aft, bef, 1); break; case OEOL: if (ch == EOL || ch == BOLEOL) FWD(aft, bef, 1); break; case OBOW: if (ch == BOW) FWD(aft, bef, 1); break; case OEOW: if (ch == EOW) FWD(aft, bef, 1); break; case OANY: if (!NONCHAR(ch)) FWD(aft, bef, 1); break; case OANYOF: cs = &g->sets[OPND(s)]; if (!NONCHAR(ch) && CHIN(cs, ch)) FWD(aft, bef, 1); break; case OBACK_: /* ignored here */ case O_BACK: FWD(aft, aft, 1); break; case OPLUS_: /* forward, this is just an empty */ FWD(aft, aft, 1); break; case O_PLUS: /* both forward and back */ FWD(aft, aft, 1); i = ISSETBACK(aft, OPND(s)); BACK(aft, aft, OPND(s)); if (!i && ISSETBACK(aft, OPND(s))) { /* oho, must reconsider loop body */ pc -= OPND(s) + 1; INIT(here, pc); } break; case OQUEST_: /* two branches, both forward */ FWD(aft, aft, 1); FWD(aft, aft, OPND(s)); break; case O_QUEST: /* just an empty */ FWD(aft, aft, 1); break; case OLPAREN: /* not significant here */ case ORPAREN: FWD(aft, aft, 1); break; case OCH_: /* mark the first two branches */ FWD(aft, aft, 1); assert(OP(g->strip[pc + OPND(s)]) == OOR2); FWD(aft, aft, OPND(s)); break; case OOR1: /* done a branch, find the O_CH */ if (ISSTATEIN(aft, here)) { for (look = 1; OP(s = g->strip[pc + look]) != O_CH; look += OPND(s)) assert(OP(s) == OOR2); FWD(aft, aft, look); } break; case OOR2: /* propagate OCH_'s marking */ FWD(aft, aft, 1); if (OP(g->strip[pc + OPND(s)]) != O_CH) { assert(OP(g->strip[pc + OPND(s)]) == OOR2); FWD(aft, aft, OPND(s)); } break; case O_CH: /* just empty */ FWD(aft, aft, 1); break; default: /* ooooops... */ assert(nope); break; } } return aft;}#ifdef REDEBUG/* - print - print a set of states == #ifdef REDEBUG == static void print(struct match *m, char *caption, states st, \ == int ch, FILE *d); == #endif */static voidprint(m, caption, st, ch, d)struct match *m;pg_wchar *caption;states st;int ch;FILE *d;{ struct re_guts *g = m->g; int i; int first = 1; if (!(m->eflags & REG_TRACE)) return; fprintf(d, "%s", caption); if (ch != '\0') fprintf(d, " %s", pchar(ch)); for (i = 0; i < g->nstates; i++) if (ISSET(st, i)) { fprintf(d, "%s%d", (first) ? "\t" : ", ", i); first = 0; } fprintf(d, "\n");}/* - at - print current situation == #ifdef REDEBUG == static void at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, \ == sopno startst, sopno stopst); == #endif */static voidat(m, title, start, stop, startst, stopst)struct match *m;pg_wchar *title;pg_wchar *start;pg_wchar *stop;sopno startst;sopno stopst;{ if (!(m->eflags & REG_TRACE)) return; printf("%s %s-", title, pchar(*start)); printf("%s ", pchar(*stop)); printf("%ld-%ld\n", (long) startst, (long) stopst);}#ifndef PCHARDONE#define PCHARDONE /* never again *//* - pchar - make a character printable == #ifdef REDEBUG == static char *pchar(int ch); == #endif * * Is this identical to regchar() over in debug.c? Well, yes. But a * duplicate here avoids having a debugging-capable regexec.o tied to * a matching debug.o, and this is convenient. It all disappears in * the non-debug compilation anyway, so it doesn't matter much. */static intpg_isprint(int c){#ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && isprint(c));#else return (isprint(c));#endif}static pg_wchar * /* -> representation */pchar(ch)int ch;{ static pg_wchar pbuf[10]; if (pg_isprint(ch) || ch == ' ') sprintf(pbuf, "%c", ch); else sprintf(pbuf, "\\%o", ch); return pbuf;}#endif#endif#undef matcher#undef fast#undef slow#undef dissect#undef backref#undef step#undef print#undef at#undef match
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -