📄 regcore.c
字号:
/* regcore.c - regular expression handling *//*modification history--------------------01c,02mar98,pcn Removed warnings.01b,30sep96,elp put in share, adapted to be compiled on target side (SPR# 6775).01a,10jan95,jcf created.*//*DESCRIPTIONThis library is *not* the original BSD distribution. Though the changeswere completely cosmetic (file naming, and such), the user of this libraryis forewarned.AUTHOR: Henry Spencer*//*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Henry Spencer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)engine.c 8.5 (Berkeley) 3/20/94 *//* * The matching engine and friends. This file is #included by regexec.c * after suitable #defines of a variety of macros used herein, so that * different state representations can be used without duplicating masses * of code. */#ifdef SNAMES#define matcher smatcher#define fast sfast#define slow sslow#define dissect sdissect#define backref sbackref#define step sstep#define print sprint#define at sat#define match smat#endif#ifdef LNAMES#define matcher lmatcher#define fast lfast#define slow lslow#define dissect ldissect#define backref lbackref#define step lstep#define print lprint#define at lat#define match lmat#endif/* another structure passed up and down to avoid zillions of parameters */struct match { struct re_guts *g; int eflags; regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ char *offp; /* offsets work from here */ char *beginp; /* start of string -- virtual NUL precedes */ char *endp; /* end of string -- virtual NUL here */ char *coldp; /* can be no match starting before here */ char **lastpos; /* [nplus+1] */ STATEVARS; states st; /* current states */ states fresh; /* states for a fresh start */ states tmp; /* temporary */ states empty; /* empty set of states */};/* ========= begin header generated by ./mkh ========= */#ifdef __cplusplusextern "C" {#endif/* === engine.c === */static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));#define BOL (OUT+1)#define EOL (BOL+1)#define BOLEOL (BOL+2)#define NOTHING (BOL+3)#define BOW (BOL+4)#define EOW (BOL+5)#define CODEMAX (BOL+5) /* highest code used */#define NONCHAR(c) ((c) > CHAR_MAX)#define NNONCHAR (CODEMAX-CHAR_MAX)#ifdef REDEBUGstatic void print __P((struct match *m, char *caption, states st, int ch, FILE *d));#endif#ifdef REDEBUGstatic void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));#endif#ifdef REDEBUGstatic char *pchar __P((int ch));#endif#ifdef __cplusplus}#endif/* ========= end header generated by ./mkh ========= */#ifdef REDEBUG#define SP(t, s, c) print(m, t, s, c, stdout)#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); }#else#define SP(t, s, c) /* nothing */#define AT(t, p1, p2, s1, s2) /* nothing */#define NOTE(s) /* nothing */#endif/* - matcher - the actual matching engine == static int matcher(register struct re_guts *g, char *string, \ == size_t nmatch, regmatch_t pmatch[], int eflags); */static int /* 0 success, REG_NOMATCH failure */matcher(g, string, nmatch, pmatch, eflags)register struct re_guts *g;char *string;size_t nmatch;regmatch_t pmatch[];int eflags;{ register char *endp; register int i; struct match mv; register struct match *m = &mv; register char *dp; register const sopno gf = g->firststate+1; /* +1 for OEND */ register const sopno gl = g->laststate; char *start; char *stop; /* simplify the situation where possible */ if (g->cflags®_NOSUB) nmatch = 0; if (eflags®_STARTEND) { start = string + pmatch[0].rm_so; stop = string + pmatch[0].rm_eo; } else { start = string; stop = start + strlen(start); } if (stop < start) return(REG_INVARG); /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && stop - dp >= g->mlen && memcmp(dp, g->must, (size_t)g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ return(REG_NOMATCH); } /* match struct setup */ m->g = g; m->eflags = eflags; m->pmatch = NULL; m->lastpos = NULL; m->offp = string; m->beginp = start; m->endp = stop; STATESETUP(m, 4); SETUP(m->st); SETUP(m->fresh); SETUP(m->tmp); SETUP(m->empty); CLEAR(m->empty); /* this loop does only one repetition except for backrefs */ for (;;) { endp = fast(m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ STATETEARDOWN(m); return(REG_NOMATCH); } if (nmatch == 0 && !g->backrefs) break; /* no further info needed */ /* where? */ assert(m->coldp != NULL); for (;;) { NOTE("finding start"); endp = slow(m, m->coldp, stop, gf, gl); if (endp != NULL) break; assert(m->coldp < m->endp); m->coldp++; } if (nmatch == 1 && !g->backrefs) break; /* no further info needed */ /* oh my, he wants the subexpressions... */ if (m->pmatch == NULL) m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * sizeof(regmatch_t)); if (m->pmatch == NULL) { STATETEARDOWN(m); return(REG_ESPACE); } for (i = 1; i <= (int)(m->g->nsub); i++) m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; if (!g->backrefs && !(m->eflags®_BACKR)) { NOTE("dissecting"); dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) m->lastpos = (char **)malloc((g->nplus+1) * sizeof(char *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); STATETEARDOWN(m); return(REG_ESPACE); } NOTE("backref dissect"); dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); } if (dp != NULL) break; /* uh-oh... we couldn't find a subexpression-level match */ assert(g->backrefs); /* must be back references doing it */ assert(g->nplus == 0 || m->lastpos != NULL); for (;;) { if (dp != NULL || endp <= m->coldp) break; /* defeat */ NOTE("backoff"); endp = slow(m, m->coldp, endp-1, gf, gl); if (endp == NULL) break; /* defeat */ /* try it on a shorter possibility */#ifndef NDEBUG for (i = 1; i <= m->g->nsub; i++) { assert(m->pmatch[i].rm_so == -1); assert(m->pmatch[i].rm_eo == -1); }#endif NOTE("backoff dissect"); dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); } assert(dp == NULL || dp == endp); if (dp != NULL) /* found a shorter one */ break; /* despite initial appearances, there is no match here */ NOTE("false alarm"); start = m->coldp + 1; /* recycle starting later */ assert(start <= stop); } /* fill in the details if requested */ if (nmatch > 0) { pmatch[0].rm_so = m->coldp - m->offp; pmatch[0].rm_eo = endp - m->offp; } if (nmatch > 1) { assert(m->pmatch != NULL); for (i = 1; i < (int)nmatch; i++) if (i <= (int)(m->g->nsub)) pmatch[i] = m->pmatch[i]; else { pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; } } if (m->pmatch != NULL) free((char *)m->pmatch); if (m->lastpos != NULL) free((char *)m->lastpos); STATETEARDOWN(m); return(0);}/* - dissect - figure out what matched what, no back references == static char *dissect(register struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst); */static char * /* == stop (success) always */dissect(m, start, stop, startst, stopst)register struct match *m;char *start;char *stop;sopno startst;sopno stopst;{ register int i; register sopno ss; /* start sop of current subRE */ register sopno es; /* end sop of current subRE */ register char *sp; /* start of string matched by it */ register char *stp; /* string matched by it cannot pass here */ register char *rest; /* start of rest of string */ register char *tail; /* string unmatched by rest of RE */ register sopno ssub; /* start sop of subsubRE */ register sopno esub; /* end sop of subsubRE */ register char *ssp; /* start of string matched by subsubRE */ register char *sep; /* end of string matched by subsubRE */ register char *oldssp; /* previous ssp */ register char *dp; AT("diss", start, stop, startst, stopst); sp = start; for (ss = startst; ss < stopst; ss = es) { /* identify end of subRE */ es = ss; switch (OP(m->g->strip[es])) { case OPLUS_: case OQUEST_: es += OPND(m->g->strip[es]); break; case OCH_: while ((int)OP(m->g->strip[es]) != O_CH) es += OPND(m->g->strip[es]); break; } es++; /* figure out what it matched */ switch (OP(m->g->strip[ss])) { case OEND: assert(nope); break; case OCHAR: sp++; break; case OBOL: case OEOL: case OBOW: case OEOW: break; case OANY: case OANYOF: sp++; break; case OBACK_: case O_BACK: assert(nope); break; /* cases where length of match is hard to find */ case OQUEST_: stp = stop; for (;;) { /* how long could this one be? */ rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = es - 1; /* did innards match? */ if (slow(m, sp, rest, ssub, esub) != NULL) { dp = dissect(m, sp, rest, ssub, esub); assert(dp == rest); } else /* no */ assert(sp == rest); sp = rest; break; case OPLUS_: stp = stop; for (;;) { /* how long could this one be? */ rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = es - 1; ssp = sp; oldssp = ssp; for (;;) { /* find last match of innards */ sep = slow(m, ssp, rest, ssub, esub); if (sep == NULL || sep == ssp) break; /* failed or matched null */ oldssp = ssp; /* on to next try */ ssp = sep; } if (sep == NULL) { /* last successful match */ sep = ssp; ssp = oldssp; } assert(sep == rest); /* must exhaust substring */ assert(slow(m, ssp, sep, ssub, esub) == rest); dp = dissect(m, ssp, sep, ssub, esub); assert(dp == sep); sp = rest; break; case OCH_: stp = stop; for (;;) { /* how long could this one be? */ rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = ss + OPND(m->g->strip[ss]) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ if (slow(m, sp, rest, ssub, esub) == rest) break; /* it matched all of it */ /* that one missed, try next one */ assert(OP(m->g->strip[esub]) == OOR1); esub++; assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); if ((int)OP(m->g->strip[esub]) == OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } dp = dissect(m, sp, rest, ssub, esub); assert(dp == rest); sp = rest; break; case O_PLUS: case O_QUEST: case OOR1: case OOR2: case O_CH: assert(nope); break; case OLPAREN: i = OPND(m->g->strip[ss]); assert(0 < i && i <= m->g->nsub); m->pmatch[i].rm_so = sp - m->offp; break; case ORPAREN: i = OPND(m->g->strip[ss]); assert(0 < i && i <= m->g->nsub); m->pmatch[i].rm_eo = sp - m->offp; break; default: /* uh oh */ assert(nope); break; } } assert(sp == stop); return(sp);}/* - backref - figure out what matched what, figuring in back references == static char *backref(register struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst, sopno lev); */static char * /* == stop (success) or NULL (failure) */backref(m, start, stop, startst, stopst, lev)register struct match *m;char *start;char *stop;sopno startst;sopno stopst;sopno lev; /* PLUS nesting level */{ register int i; register sopno ss; /* start sop of current subRE */ register char *sp; /* start of string matched by it */ register sopno ssub; /* start sop of subsubRE */ register sopno esub; /* end sop of subsubRE */ register char *ssp; /* start of string matched by subsubRE */ register char *dp; register size_t len; register int hard; register sop s; register regoff_t offsave; register cset *cs; AT("back", start, stop, startst, stopst); sp = start; /* get as far as we can with easy stuff */ hard = 0; for (ss = startst; !hard && ss < stopst; ss++) switch (OP(s = m->g->strip[ss])) { case OCHAR: if (sp == stop || *sp++ != (char)OPND(s)) return(NULL); break; case OANY: if (sp == stop) return(NULL); sp++; break; case OANYOF: cs = &m->g->sets[OPND(s)]; if (sp == stop || !CHIN(cs, *sp++)) return(NULL); break; case OBOL: if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -