regexp.c

来自「操作系统源代码」· C语言代码 · 共 935 行 · 第 1/2 页
935 行
				}				if (digit != '}')				{					FAIL("Bad characters after \\{");				}				else if (to < from || to == 0 || from >= 255)				{					FAIL("Invalid range for \\{ \\}");				}				re->minlen += from;			}			else#endif			if (peek != M_SPLAT)			{				re->minlen++;			}			/* it is okay -- make it prefix instead of postfix */			ADD_META(build, peek);#ifndef CRUNCH			if (peek == M_RANGE)			{				*build++ = from;				*build++ = (to < 255 ? to : 255);			}#endif						/* take care of "needfirst" - is this the first char? */			if (needfirst && peek == M_PLUS && !IS_META(token))			{				re->first = token;			}			needfirst = 0;			/* we used "peek" -- need to refill it */			peek = gettoken(&exp, re);			if (IS_CLOSURE(peek))			{				FAIL("* or \\+ or \\? doubled up");			}		}		else if (!IS_META(token))		{			/* normal char is NOT argument of closure */			if (needfirst)			{				re->first = token;				needfirst = 0;			}			re->minlen++;		}		else if (token == M_ANY || IS_CLASS(token))		{			/* . or [] is NOT argument of closure */			needfirst = 0;			re->minlen++;		}		/* the "token" character is not closure -- process it normally */		if (token == M_BEGLINE)		{			/* set the BOL flag instead of storing M_BEGLINE */			re->bol = 1;		}		else if (IS_META(token))		{			ADD_META(build, token);		}		else		{			*build++ = token;		}	}	/* end it with a \) which MUST MATCH the opening \( */	ADD_META(build, M_END(0));	if (end_sp > 0)	{		FAIL("Not enough \\)s");	}	return re;}/*---------------------------------------------------------------------------*//* This function checks for a match between a character and a token which is * known to represent a single character.  It returns 0 if they match, or * 1 if they don't. */int match1(re, ch, token)	regexp		*re;	REG char	ch;	REG int		token;{	if (!ch)	{		/* the end of a line can't match any RE of width 1 */		return 1;	}	if (token == M_ANY)	{		return 0;	}	else if (IS_CLASS(token))	{		if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7)))			return 0;	}	else if (ch == token || *o_ignorecase && tolower(ch) == tolower(token))	{		return 0;	}	return 1;}/* This function checks characters up to and including the next closure, at * which point it does a recursive call to check the rest of it.  This function * returns 0 if everything matches, or 1 if something doesn't match. */int match(re, str, prog, here)	regexp		*re;	/* the regular expression */	char		*str;	/* the string */	REG char	*prog;	/* a portion of re->program, an compiled RE */	REG char	*here;	/* a portion of str, the string to compare it to */{	REG int		token;	/* the roken pointed to by prog */	REG int		nmatched;/* counter, used during closure matching */ 	REG int		closure;/* the token denoting the type of closure */	int		from;	/* minimum number of matches in closure */	int		to;	/* maximum number of matches in closure */	for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog))	{		switch (token)		{		/*case M_BEGLINE: can't happen; re->bol is used instead */		  case M_ENDLINE:			if (*here)				return 1;			break;		  case M_BEGWORD:			if (here != str &&			   (here[-1] == '_' || isalnum(here[-1])))				return 1;			break;		  case M_ENDWORD:			if (here[0] == '_' || isalnum(here[0]))				return 1;			break;		  case M_START(0):		  case M_START(1):		  case M_START(2):		  case M_START(3):		  case M_START(4):		  case M_START(5):		  case M_START(6):		  case M_START(7):		  case M_START(8):		  case M_START(9):			re->startp[token - M_START(0)] = (char *)here;			break;		  case M_END(0):		  case M_END(1):		  case M_END(2):		  case M_END(3):		  case M_END(4):		  case M_END(5):		  case M_END(6):		  case M_END(7):		  case M_END(8):		  case M_END(9):			re->endp[token - M_END(0)] = (char *)here;			if (token == M_END(0))			{				return 0;			}			break;		  default: /* literal, M_CLASS(n), or M_ANY */			if (match1(re, *here, token) != 0)				return 1;			here++;		}	}	/* C L O S U R E */	/* step 1: see what we have to match against, and move "prog" to point	 * to the remainder of the compiled RE.	 */	closure = token;	prog++;	switch (closure)	{	  case M_SPLAT:		from = 0;		to = strlen(str);	/* infinity */		break;	  case M_PLUS:		from = 1;		to = strlen(str);	/* infinity */		break;	  case M_QMARK:		from = 0;		to = 1;		break;#ifndef CRUNCH	  case M_RANGE:		from = UCHAR(*prog++);		to = UCHAR(*prog++);		if (to == 255)		{			to = strlen(str); /* infinity */		}		break;#endif	}	token = GET_META(prog);	prog++;	/* step 2: see how many times we can match that token against the string */	for (nmatched = 0;	     nmatched < to && *here && match1(re, *here, token) == 0;	     nmatched++, here++)	{	}	/* step 3: try to match the remainder, and back off if it doesn't */	while (nmatched >= from && match(re, str, prog, here) != 0)	{		nmatched--;		here--;	}	/* so how did it work out? */	if (nmatched >= from)		return 0;	return 1;}/* This function searches through a string for text that matches an RE. */int regexec(re, str, bol)	regexp	*re;	/* the compiled regexp to search for */	char	*str;	/* the string to search through */	int	bol;	/* boolean: does str start at the beginning of a line? */{	char	*prog;	/* the entry point of re->program */	int	len;	/* length of the string */	REG char	*here;	/* if must start at the beginning of a line, and this isn't, then fail */	if (re->bol && !bol)	{		return 0;	}	len = strlen(str);	prog = re->program + 1 + 32 * re->program[0];	/* search for the RE in the string */	if (re->bol)	{		/* must occur at BOL */		if ((re->first			&& match1(re, *(char *)str, re->first))/* wrong first letter? */		 || len < re->minlen			/* not long enough? */		 || match(re, (char *)str, prog, str))	/* doesn't match? */			return 0;			/* THEN FAIL! */	}#ifndef CRUNCH	else if (!*o_ignorecase)	{		/* can occur anywhere in the line, noignorecase */		for (here = (char *)str;		     (re->first && re->first != *here)			|| match(re, (char *)str, prog, here);		     here++, len--)		{			if (len < re->minlen)				return 0;		}	}#endif	else	{		/* can occur anywhere in the line, ignorecase */		for (here = (char *)str;		     (re->first && match1(re, *here, (int)re->first))			|| match(re, (char *)str, prog, here);		     here++, len--)		{			if (len < re->minlen)				return 0;		}	}	/* if we didn't fail, then we must have succeeded */	return 1;}/*============================================================================*/#else /* NO_MAGIC */regexp *regcomp(exp)	char	*exp;{	char	*src;	char	*dest;	regexp	*re;	int	i;	/* allocate a big enough regexp structure */#ifdef lint	re = (regexp *)0;#else	re = (regexp *)malloc((unsigned)(strlen(exp) + 1 + sizeof(struct regexp)));#endif	if (!re)	{		regerror("Could not malloc a regexp structure");		return (regexp *)0;	}	/* initialize all fields of the structure */	for (i = 0; i < NSUBEXP; i++)	{		re->startp[i] = re->endp[i] = (char *)0;	}	re->minlen = 0;	re->first = 0;	re->bol = 0;	/* copy the string into it, translating ^ and $ as needed */	for (src = exp, dest = re->program + 1; *src; src++)	{		switch (*src)		{		  case '^':			if (src == exp)			{				re->bol += 1;			}			else			{				*dest++ = '^';				re->minlen++;			}			break;		  case '$':			if (!src[1])			{				re->bol += 2;			}			else			{				*dest++ = '$';				re->minlen++;			}			break;		  case '\\':			if (src[1])			{				*dest++ = *++src;				re->minlen++;			}			else			{				regerror("extra \\ at end of regular expression");			}			break;		  default:			*dest++ = *src;			re->minlen++;		}	}	*dest = '\0';	return re;}/* This "helper" function checks for a match at a given location.  It returns * 1 if it matches, 0 if it doesn't match here but might match later on in the * string, or -1 if it could not possibly match */static int reghelp(prog, string, bolflag)	struct regexp	*prog;	char		*string;	int		bolflag;{	char		*scan;	char		*str;	/* if ^, then require bolflag */	if ((prog->bol & 1) && !bolflag)	{		return -1;	}	/* if it matches, then it will start here */	prog->startp[0] = string;	/* compare, possibly ignoring case */	if (*o_ignorecase)	{		for (scan = &prog->program[1]; *scan; scan++, string++)			if (tolower(*scan) != tolower(*string))				return *string ? 0 : -1;	}	else	{		for (scan = &prog->program[1]; *scan; scan++, string++)			if (*scan != *string)				return *string ? 0 : -1;	}	/* if $, then require string to end here, too */	if ((prog->bol & 2) && *string)	{		return 0;	}	/* if we get to here, it matches */	prog->endp[0] = string;	return 1;}int regexec(prog, string, bolflag)	struct regexp	*prog;	char		*string;	int		bolflag;{	int		rc;	/* keep trying to match it */	for (rc = reghelp(prog, string, bolflag); rc == 0; rc = reghelp(prog, string, 0))	{		string++;	}	/* did we match? */	return rc == 1;}#endif
regexp.c - 源码说明

本页面展示了「操作系统源代码」中的 regexp.c 源码文件，采用 C语言编程语言编写，共 935 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与操作系统相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?