📄 regexec.c
字号:
/* NOTE: this is derived from Henry Spencer's regexp code, and should not * confused with the original package (see point 3 below). Thanks, Henry! *//* Additional note: this code is very heavily munged from Henry's version * in places. In some spots I've traded clarity for efficiency, so don't * blame Henry for some of the lack of readability. *//* $RCSfile: regexec.c,v $$Revision: 4.0.1.4 $$Date: 92/06/08 15:25:50 $ * * $Log: regexec.c,v $ * Revision 4.0.1.4 92/06/08 15:25:50 lwall * patch20: pattern modifiers i and g didn't interact right * patch20: in some cases $` and $' didn't get set by match * patch20: /x{0}/ was wrongly interpreted as /x{0,}/ * * Revision 4.0.1.3 91/11/05 18:23:55 lwall * patch11: prepared for ctype implementations that don't define isascii() * patch11: initial .* in pattern had dependency on value of $* * * Revision 4.0.1.2 91/06/07 11:50:33 lwall * patch4: new copyright notice * patch4: // wouldn't use previous pattern if it started with a null character * * Revision 4.0.1.1 91/04/12 09:07:39 lwall * patch1: regexec only allocated space for 9 subexpresssions * * Revision 4.0 91/03/20 01:39:16 lwall * 4.0 baseline. * *//*SUPPRESS 112*//* * regcomp and regexec -- regsub and regerror are not used in perl * * Copyright (c) 1986 by University of Toronto. * Written by Henry Spencer. Not derived from licensed software. * * Permission is granted to anyone to use this software for any * purpose on any computer system, and to redistribute it freely, * subject to the following restrictions: * * 1. The author is not responsible for the consequences of use of * this software, no matter how awful, even if they arise * from defects in it. * * 2. The origin of this software must not be misrepresented, either * by explicit claim or by omission. * * 3. Altered versions must be plainly marked as such, and must not * be misrepresented as being the original software. * **** Alterations to Henry's code are... **** **** Copyright (c) 1991, Larry Wall **** **** You may distribute under the terms of either the GNU General Public **** License or the Artistic License, as specified in the README file. * * Beware that some of this code is subtly aware of the way operator * precedence is structured in regular expressions. Serious changes in * regular-expression syntax might require a total rethink. */#include "EXTERN.h"#include "perl.h"#include "regcomp.h"#ifndef STATIC#define STATIC static#endif#ifdef DEBUGGINGint regnarrate = 0;#endif/* * regexec and friends *//* * Global work variables for regexec(). */static char *regprecomp;static char *reginput; /* String-input pointer. */static char regprev; /* char before regbol, \n if none */static char *regbol; /* Beginning of input, for ^ check. */static char *regeol; /* End of input, for $ check. */static char **regstartp; /* Pointer to startp array. */static char **regendp; /* Ditto for endp. */static char *reglastparen; /* Similarly for lastparen. */static char *regtill;static int regmyp_size = 0;static char **regmystartp = Null(char**);static char **regmyendp = Null(char**);/* * Forwards. */STATIC int regtry();STATIC int regmatch();STATIC int regrepeat();extern int multiline;/* - regexec - match a regexp against a string */intregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)register regexp *prog;char *stringarg;register char *strend; /* pointer to null at end of string */char *strbeg; /* real beginning of string */int minend; /* end of match must be at least minend after stringarg */STR *screamer;int safebase; /* no need to remember string in subbase */{ register char *s; register int i; register char *c; register char *string = stringarg; register int tmp; int minlen = 0; /* must match at least this many chars */ int dontbother = 0; /* how many characters not to try at end */ /* Be paranoid... */ if (prog == NULL || string == NULL) { fatal("NULL regexp parameter"); return(0); } if (string == strbeg) /* is ^ valid at stringarg? */ regprev = '\n'; else { regprev = stringarg[-1]; if (!multiline && regprev == '\n') regprev = '\0'; /* force ^ to NOT match */ } regprecomp = prog->precomp; /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { FAIL("corrupted regexp program"); } if (prog->do_folding) { i = strend - string; New(1101,c,i+1,char); Copy(string, c, i+1, char); string = c; strend = string + i; for (s = string; s < strend; s++) if (isUPPER(*s)) *s = tolower(*s); } /* If there is a "must appear" string, look for it. */ s = string; if (prog->regmust != Nullstr && (!(prog->reganch & ROPT_ANCH) || (multiline && prog->regback >= 0)) ) { if (stringarg == strbeg && screamer) { if (screamfirst[prog->regmust->str_rare] >= 0) s = screaminstr(screamer,prog->regmust); else s = Nullch; }#ifndef lint else s = fbminstr((unsigned char*)s, (unsigned char*)strend, prog->regmust);#endif if (!s) { ++prog->regmust->str_u.str_useful; /* hooray */ goto phooey; /* not present */ } else if (prog->regback >= 0) { s -= prog->regback; if (s < string) s = string; minlen = prog->regback + prog->regmust->str_cur; } else if (--prog->regmust->str_u.str_useful < 0) { /* boo */ str_free(prog->regmust); prog->regmust = Nullstr; /* disable regmust */ s = string; } else { s = string; minlen = prog->regmust->str_cur; } } /* Mark beginning of line for ^ . */ regbol = string; /* Mark end of line for $ (and such) */ regeol = strend; /* see how far we have to get to not match where we matched before */ regtill = string+minend; /* Allocate our backreference arrays */ if ( regmyp_size < prog->nparens + 1 ) { /* Allocate or enlarge the arrays */ regmyp_size = prog->nparens + 1; if ( regmyp_size < 10 ) regmyp_size = 10; /* minimum */ if ( regmystartp ) { /* reallocate larger */ Renew(regmystartp,regmyp_size,char*); Renew(regmyendp, regmyp_size,char*); } else { /* Initial allocation */ New(1102,regmystartp,regmyp_size,char*); New(1102,regmyendp, regmyp_size,char*); } } /* Simplest case: anchored match need be tried only once. */ /* [unless multiline is set] */ if (prog->reganch & ROPT_ANCH) { if (regtry(prog, string)) goto got_it; else if (multiline || (prog->reganch & ROPT_IMPLICIT)) { if (minlen) dontbother = minlen - 1; strend -= dontbother; /* for multiline we only have to try after newlines */ if (s > string) s--; while (s < strend) { if (*s++ == '\n') { if (s < strend && regtry(prog, s)) goto got_it; } } } goto phooey; } /* Messy cases: unanchored match. */ if (prog->regstart) { if (prog->reganch & ROPT_SKIP) { /* we have /x+whatever/ */ /* it must be a one character string */ i = prog->regstart->str_ptr[0]; while (s < strend) { if (*s == i) { if (regtry(prog, s)) goto got_it; s++; while (s < strend && *s == i) s++; } s++; } } else if (prog->regstart->str_pok == 3) { /* We know what string it must start with. */#ifndef lint while ((s = fbminstr((unsigned char*)s, (unsigned char*)strend, prog->regstart)) != NULL)#else while (s = Nullch)#endif { if (regtry(prog, s)) goto got_it; s++; } } else { c = prog->regstart->str_ptr; while ((s = ninstr(s, strend, c, c + prog->regstart->str_cur )) != NULL) { if (regtry(prog, s)) goto got_it; s++; } } goto phooey; } /*SUPPRESS 560*/ if (c = prog->regstclass) { int doevery = (prog->reganch & ROPT_SKIP) == 0; if (minlen) dontbother = minlen - 1; strend -= dontbother; /* don't bother with what can't match */ tmp = 1; /* We know what class it must start with. */ switch (OP(c)) { case ANYOF: c = OPERAND(c); while (s < strend) { i = UCHARAT(s); if (!(c[i >> 3] & (1 << (i&7)))) { if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; } else tmp = 1; s++; } break; case BOUND: if (minlen) dontbother++,strend--; if (s != string) { i = s[-1]; tmp = isALNUM(i); } else tmp = isALNUM(regprev); /* assume not alphanumeric */ while (s < strend) { i = *s; if (tmp != isALNUM(i)) { tmp = !tmp; if (regtry(prog, s)) goto got_it; } s++; } if ((minlen || tmp) && regtry(prog,s)) goto got_it; break; case NBOUND: if (minlen) dontbother++,strend--; if (s != string) { i = s[-1]; tmp = isALNUM(i); } else tmp = isALNUM(regprev); /* assume not alphanumeric */ while (s < strend) { i = *s; if (tmp != isALNUM(i)) tmp = !tmp; else if (regtry(prog, s)) goto got_it; s++; } if ((minlen || !tmp) && regtry(prog,s)) goto got_it; break; case ALNUM: while (s < strend) { i = *s; if (isALNUM(i)) { if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; } else tmp = 1; s++; } break; case NALNUM: while (s < strend) { i = *s; if (!isALNUM(i)) { if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; } else tmp = 1; s++; } break; case SPACE: while (s < strend) { if (isSPACE(*s)) { if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; } else tmp = 1; s++; } break; case NSPACE: while (s < strend) { if (!isSPACE(*s)) { if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; } else tmp = 1; s++; } break; case DIGIT: while (s < strend) { if (isDIGIT(*s)) { if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; } else tmp = 1; s++; } break; case NDIGIT: while (s < strend) { if (!isDIGIT(*s)) { if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; } else tmp = 1; s++; } break; } } else { if (minlen) dontbother = minlen - 1; strend -= dontbother; /* We don't know much -- general case. */ do { if (regtry(prog, s)) goto got_it; } while (s++ < strend); } /* Failure. */ goto phooey; got_it: prog->subbeg = strbeg; prog->subend = strend; if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){ strend += dontbother; /* uncheat */ if (safebase) /* no need for $digit later */ s = strbeg; else if (strbeg != prog->subbase) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -