⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexec.c

📁 早期freebsd实现
💻 C
📖 第 1 页 / 共 2 页
字号:
/* NOTE: this is derived from Henry Spencer's regexp code, and should not * confused with the original package (see point 3 below).  Thanks, Henry! *//* Additional note: this code is very heavily munged from Henry's version * in places.  In some spots I've traded clarity for efficiency, so don't * blame Henry for some of the lack of readability. *//* $RCSfile: regexec.c,v $$Revision: 4.0.1.4 $$Date: 92/06/08 15:25:50 $ * * $Log:	regexec.c,v $ * Revision 4.0.1.4  92/06/08  15:25:50  lwall * patch20: pattern modifiers i and g didn't interact right * patch20: in some cases $` and $' didn't get set by match * patch20: /x{0}/ was wrongly interpreted as /x{0,}/ *  * Revision 4.0.1.3  91/11/05  18:23:55  lwall * patch11: prepared for ctype implementations that don't define isascii() * patch11: initial .* in pattern had dependency on value of $* *  * Revision 4.0.1.2  91/06/07  11:50:33  lwall * patch4: new copyright notice * patch4: // wouldn't use previous pattern if it started with a null character *  * Revision 4.0.1.1  91/04/12  09:07:39  lwall * patch1: regexec only allocated space for 9 subexpresssions *  * Revision 4.0  91/03/20  01:39:16  lwall * 4.0 baseline. *  *//*SUPPRESS 112*//* * regcomp and regexec -- regsub and regerror are not used in perl * *	Copyright (c) 1986 by University of Toronto. *	Written by Henry Spencer.  Not derived from licensed software. * *	Permission is granted to anyone to use this software for any *	purpose on any computer system, and to redistribute it freely, *	subject to the following restrictions: * *	1. The author is not responsible for the consequences of use of *		this software, no matter how awful, even if they arise *		from defects in it. * *	2. The origin of this software must not be misrepresented, either *		by explicit claim or by omission. * *	3. Altered versions must be plainly marked as such, and must not *		be misrepresented as being the original software. * ****    Alterations to Henry's code are... **** ****    Copyright (c) 1991, Larry Wall **** ****    You may distribute under the terms of either the GNU General Public ****    License or the Artistic License, as specified in the README file. * * Beware that some of this code is subtly aware of the way operator * precedence is structured in regular expressions.  Serious changes in * regular-expression syntax might require a total rethink. */#include "EXTERN.h"#include "perl.h"#include "regcomp.h"#ifndef STATIC#define	STATIC	static#endif#ifdef DEBUGGINGint regnarrate = 0;#endif/* * regexec and friends *//* * Global work variables for regexec(). */static char *regprecomp;static char *reginput;		/* String-input pointer. */static char regprev;		/* char before regbol, \n if none */static char *regbol;		/* Beginning of input, for ^ check. */static char *regeol;		/* End of input, for $ check. */static char **regstartp;	/* Pointer to startp array. */static char **regendp;		/* Ditto for endp. */static char *reglastparen;	/* Similarly for lastparen. */static char *regtill;static int regmyp_size = 0;static char **regmystartp = Null(char**);static char **regmyendp   = Null(char**);/* * Forwards. */STATIC int regtry();STATIC int regmatch();STATIC int regrepeat();extern int multiline;/* - regexec - match a regexp against a string */intregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)register regexp *prog;char *stringarg;register char *strend;	/* pointer to null at end of string */char *strbeg;	/* real beginning of string */int minend;	/* end of match must be at least minend after stringarg */STR *screamer;int safebase;	/* no need to remember string in subbase */{	register char *s;	register int i;	register char *c;	register char *string = stringarg;	register int tmp;	int minlen = 0;		/* must match at least this many chars */	int dontbother = 0;	/* how many characters not to try at end */	/* Be paranoid... */	if (prog == NULL || string == NULL) {		fatal("NULL regexp parameter");		return(0);	}	if (string == strbeg)	/* is ^ valid at stringarg? */	    regprev = '\n';	else {	    regprev = stringarg[-1];	    if (!multiline && regprev == '\n')		regprev = '\0';		/* force ^ to NOT match */	}	regprecomp = prog->precomp;	/* Check validity of program. */	if (UCHARAT(prog->program) != MAGIC) {		FAIL("corrupted regexp program");	}	if (prog->do_folding) {		i = strend - string;		New(1101,c,i+1,char);		Copy(string, c, i+1, char);		string = c;		strend = string + i;		for (s = string; s < strend; s++)			if (isUPPER(*s))				*s = tolower(*s);	}	/* If there is a "must appear" string, look for it. */	s = string;	if (prog->regmust != Nullstr &&	    (!(prog->reganch & ROPT_ANCH)	     || (multiline && prog->regback >= 0)) ) {		if (stringarg == strbeg && screamer) {			if (screamfirst[prog->regmust->str_rare] >= 0)				s = screaminstr(screamer,prog->regmust);			else				s = Nullch;		}#ifndef lint		else			s = fbminstr((unsigned char*)s, (unsigned char*)strend,			    prog->regmust);#endif		if (!s) {			++prog->regmust->str_u.str_useful;	/* hooray */			goto phooey;	/* not present */		}		else if (prog->regback >= 0) {			s -= prog->regback;			if (s < string)			    s = string;			minlen = prog->regback + prog->regmust->str_cur;		}		else if (--prog->regmust->str_u.str_useful < 0) { /* boo */			str_free(prog->regmust);			prog->regmust = Nullstr;	/* disable regmust */			s = string;		}		else {			s = string;			minlen = prog->regmust->str_cur;		}	}	/* Mark beginning of line for ^ . */	regbol = string;	/* Mark end of line for $ (and such) */	regeol = strend;	/* see how far we have to get to not match where we matched before */	regtill = string+minend;	/* Allocate our backreference arrays */	if ( regmyp_size < prog->nparens + 1 ) {	    /* Allocate or enlarge the arrays */	    regmyp_size = prog->nparens + 1;	    if ( regmyp_size < 10 ) regmyp_size = 10;	/* minimum */	    if ( regmystartp ) {		/* reallocate larger */		Renew(regmystartp,regmyp_size,char*);		Renew(regmyendp,  regmyp_size,char*);	    }	    else {		/* Initial allocation */		New(1102,regmystartp,regmyp_size,char*);		New(1102,regmyendp,  regmyp_size,char*);	    }		}	/* Simplest case:  anchored match need be tried only once. */	/*  [unless multiline is set] */	if (prog->reganch & ROPT_ANCH) {		if (regtry(prog, string))			goto got_it;		else if (multiline || (prog->reganch & ROPT_IMPLICIT)) {			if (minlen)			    dontbother = minlen - 1;			strend -= dontbother;			/* for multiline we only have to try after newlines */			if (s > string)			    s--;			while (s < strend) {			    if (*s++ == '\n') {				if (s < strend && regtry(prog, s))				    goto got_it;			    }			}		}		goto phooey;	}	/* Messy cases:  unanchored match. */	if (prog->regstart) {		if (prog->reganch & ROPT_SKIP) {  /* we have /x+whatever/ */		    /* it must be a one character string */		    i = prog->regstart->str_ptr[0];		    while (s < strend) {			    if (*s == i) {				    if (regtry(prog, s))					    goto got_it;				    s++;				    while (s < strend && *s == i)					s++;			    }			    s++;		    }		}		else if (prog->regstart->str_pok == 3) {		    /* We know what string it must start with. */#ifndef lint		    while ((s = fbminstr((unsigned char*)s,		      (unsigned char*)strend, prog->regstart)) != NULL)#else		    while (s = Nullch)#endif		    {			    if (regtry(prog, s))				    goto got_it;			    s++;		    }		}		else {		    c = prog->regstart->str_ptr;		    while ((s = ninstr(s, strend,		      c, c + prog->regstart->str_cur )) != NULL) {			    if (regtry(prog, s))				    goto got_it;			    s++;		    }		}		goto phooey;	}	/*SUPPRESS 560*/	if (c = prog->regstclass) {		int doevery = (prog->reganch & ROPT_SKIP) == 0;		if (minlen)		    dontbother = minlen - 1;		strend -= dontbother;	/* don't bother with what can't match */		tmp = 1;		/* We know what class it must start with. */		switch (OP(c)) {		case ANYOF:		    c = OPERAND(c);		    while (s < strend) {			    i = UCHARAT(s);			    if (!(c[i >> 3] & (1 << (i&7)))) {				    if (tmp && regtry(prog, s))					    goto got_it;				    else					    tmp = doevery;			    }			    else				    tmp = 1;			    s++;		    }		    break;		case BOUND:		    if (minlen)			dontbother++,strend--;		    if (s != string) {			i = s[-1];			tmp = isALNUM(i);		    }		    else			tmp = isALNUM(regprev);	/* assume not alphanumeric */		    while (s < strend) {			    i = *s;			    if (tmp != isALNUM(i)) {				    tmp = !tmp;				    if (regtry(prog, s))					    goto got_it;			    }			    s++;		    }		    if ((minlen || tmp) && regtry(prog,s))			    goto got_it;		    break;		case NBOUND:		    if (minlen)			dontbother++,strend--;		    if (s != string) {			i = s[-1];			tmp = isALNUM(i);		    }		    else			tmp = isALNUM(regprev);	/* assume not alphanumeric */		    while (s < strend) {			    i = *s;			    if (tmp != isALNUM(i))				    tmp = !tmp;			    else if (regtry(prog, s))				    goto got_it;			    s++;		    }		    if ((minlen || !tmp) && regtry(prog,s))			    goto got_it;		    break;		case ALNUM:		    while (s < strend) {			    i = *s;			    if (isALNUM(i)) {				    if (tmp && regtry(prog, s))					    goto got_it;				    else					    tmp = doevery;			    }			    else				    tmp = 1;			    s++;		    }		    break;		case NALNUM:		    while (s < strend) {			    i = *s;			    if (!isALNUM(i)) {				    if (tmp && regtry(prog, s))					    goto got_it;				    else					    tmp = doevery;			    }			    else				    tmp = 1;			    s++;		    }		    break;		case SPACE:		    while (s < strend) {			    if (isSPACE(*s)) {				    if (tmp && regtry(prog, s))					    goto got_it;				    else					    tmp = doevery;			    }			    else				    tmp = 1;			    s++;		    }		    break;		case NSPACE:		    while (s < strend) {			    if (!isSPACE(*s)) {				    if (tmp && regtry(prog, s))					    goto got_it;				    else					    tmp = doevery;			    }			    else				    tmp = 1;			    s++;		    }		    break;		case DIGIT:		    while (s < strend) {			    if (isDIGIT(*s)) {				    if (tmp && regtry(prog, s))					    goto got_it;				    else					    tmp = doevery;			    }			    else				    tmp = 1;			    s++;		    }		    break;		case NDIGIT:		    while (s < strend) {			    if (!isDIGIT(*s)) {				    if (tmp && regtry(prog, s))					    goto got_it;				    else					    tmp = doevery;			    }			    else				    tmp = 1;			    s++;		    }		    break;		}	}	else {		if (minlen)		    dontbother = minlen - 1;		strend -= dontbother;		/* We don't know much -- general case. */		do {			if (regtry(prog, s))				goto got_it;		} while (s++ < strend);	}	/* Failure. */	goto phooey;    got_it:	prog->subbeg = strbeg;	prog->subend = strend;	if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){		strend += dontbother;	/* uncheat */		if (safebase)			/* no need for $digit later */		    s = strbeg;		else if (strbeg != prog->subbase) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -