⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 re.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
/****************************************************************Copyright (C) Lucent Technologies 1997All Rights ReservedPermission to use, copy, modify, and distribute this software andits documentation for any purpose and without fee is herebygranted, provided that the above copyright notice appear in allcopies and that both that the copyright notice and thispermission notice and warranty disclaimer appear in supportingdocumentation, and that the name Lucent Technologies or any ofits entities not be used in advertising or publicity pertainingto distribution of the software without specific, written priorpermission.LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANYSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGESWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHERIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OFTHIS SOFTWARE.****************************************************************/#define DEBUG#include <stdio.h>#include <ctype.h>#include <setjmp.h>#include <math.h>#include <string.h>#include <stdlib.h>#include <time.h>#include "awk.h"#include "y.tab.h"#include "regexp.h"	/* This file provides the interface between the main body of	 * awk and the pattern matching package.  It preprocesses	 * patterns prior to compilation to provide awk-like semantics	 * to character sequences not supported by the pattern package.	 * The following conversions are performed:	 *	 *	"()"		->	"[]"	 *	"[-"		->	"[\-"	 *	"[^-"		->	"[^\-"	 *	"-]"		->	"\-]"	 *	"[]"		->	"[]*"	 *	"\xdddd"	->	"\z" where 'z' is the UTF sequence	 *					for the hex value	 *	"\ddd"		->	"\o" where 'o' is a char octal value	 *	"\b"		->	"\B"	where 'B' is backspace	 *	"\t"		->	"\T"	where 'T' is tab	 *	"\f"		->	"\F"	where 'F' is form feed	 *	"\n"		->	"\N"	where 'N' is newline	 *	"\r"		->	"\r"	where 'C' is cr	 */#define	MAXRE	512static char	re[MAXRE];	/* copy buffer */char	*patbeg;int	patlen;			/* number of chars in pattern */#define	NPATS	20		/* number of slots in pattern cache */static struct pat_list		/* dynamic pattern cache */{	char	*re;	int	use;	Reprog	*program;} pattern[NPATS];static int npats;		/* cache fill level */	/* Compile a pattern */void*compre(char *pat){	int i, j, inclass;	char c, *p, *s;	Reprog *program;	if (!compile_time) {	/* search cache for dynamic pattern */		for (i = 0; i < npats; i++)			if (!strcmp(pat, pattern[i].re)) {				pattern[i].use++;				return((void *) pattern[i].program);			}	}		/* Preprocess Pattern for compilation */	p = re;	s = pat;	inclass = 0;	while (c = *s++) {		if (c == '\\') {			quoted(&s, &p, re+MAXRE);			continue;		}		else if (!inclass && c == '(' && *s == ')') {			if (p < re+MAXRE-2) {	/* '()' -> '[]*' */				*p++ = '[';				*p++ = ']';				c = '*';				s++;			}			else overflow();		}		else if (c == '['){			/* '[-' -> '[\-' */			inclass = 1;			if (*s == '-') {				if (p < re+MAXRE-2) {					*p++ = '[';					*p++ = '\\';					c = *s++;				}				else overflow();			}				/* '[^-' -> '[^\-'*/			else if (*s == '^' && s[1] == '-'){				if (p < re+MAXRE-3) {					*p++ = '[';					*p++ = *s++;					*p++ = '\\';					c = *s++;				}				else overflow();			}			else if (*s == '['){		/* skip '[[' */				if (p < re+MAXRE-1)					*p++ = c;				else overflow();				c = *s++;			}			else if (*s == '^' && s[1] == '[') {	/* skip '[^['*/				if (p < re+MAXRE-2) {					*p++ = c;					*p++ = *s++;					c = *s++;				}				else overflow();			}			else if (*s == ']') {		/* '[]' -> '[]*' */				if (p < re+MAXRE-2) {					*p++ = c;					*p++ = *s++;					c = '*';					inclass = 0;				}				else overflow();			}		}		else if (c == '-' && *s == ']') {	/* '-]' -> '\-]' */			if (p < re+MAXRE-1)				*p++ = '\\';			else overflow();		}		else if (c == ']')			inclass = 0;		if (p < re+MAXRE-1)			*p++ = c;		else overflow();	}	*p = 0;	program = regcomp(re);		/* compile pattern */	if (!compile_time) {		if (npats < NPATS)	/* Room in cache */			i = npats++;		else {			/* Throw out least used */			int use = pattern[0].use;			i = 0;			for (j = 1; j < NPATS; j++) {				if (pattern[j].use < use) {					use = pattern[j].use;					i = j;				}			}			xfree(pattern[i].program);			xfree(pattern[i].re);		}		pattern[i].re = tostring(pat);		pattern[i].program = program;		pattern[i].use = 1;	}	return((void *) program);}	/* T/F match indication - matched string not exported */intmatch(void *p, char *s, char *){	return regexec((Reprog *) p, (char *) s, 0, 0);}	/* match and delimit the matched string */intpmatch(void *p, char *s, char *start){	Resub m;	m.s.sp = start;	m.e.ep = 0;	if (regexec((Reprog *) p, (char *) s, &m, 1)) {		patbeg = m.s.sp;		patlen = m.e.ep-m.s.sp;		return 1;	}	patlen = -1;	patbeg = start;	return 0;}	/* perform a non-empty match */intnematch(void *p, char *s, char *start){	if (pmatch(p, s, start) == 1 && patlen > 0)		return 1;	patlen = -1;	patbeg = start; 	return 0;}/* in the parsing of regular expressions, metacharacters like . have *//* to be seen literally;  \056 is not a metacharacter. */hexstr(char **pp)	/* find and eval hex string at pp, return new p */{	char c;	int n = 0;	int i;	for (i = 0, c = (*pp)[i]; i < 4 && isxdigit(c); i++, c = (*pp)[i]) {		if (isdigit(c))			n = 16 * n + c - '0';		else if ('a' <= c && c <= 'f')			n = 16 * n + c - 'a' + 10;		else if ('A' <= c && c <= 'F')			n = 16 * n + c - 'A' + 10;	}	*pp += i;	return n;}	/* look for awk-specific escape sequences */#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */voidquoted(char **s, char **to, char *end)	/* handle escaped sequence */{	char *p = *s;	char *t = *to;	wchar_t c;	switch(c = *p++) {	case 't':		c = '\t';		break;	case 'n':		c = '\n';		break;	case 'f':		c = '\f';		break;	case 'r':		c = '\r';		break;	case 'b':		c = '\b';		break;	default:		if (t < end-1)		/* all else must be escaped */			*t++ = '\\';		if (c == 'x') {		/* hexadecimal goo follows */			c = hexstr(&p);			if (t < end-MB_CUR_MAX)				t += wctomb(t, c);			else overflow();			*to = t;			*s = p;			return;		} else if (isoctdigit(c)) {	/* \d \dd \ddd */			c -= '0';			if (isoctdigit(*p)) {				c = 8 * c + *p++ - '0';				if (isoctdigit(*p))					c = 8 * c + *p++ - '0';			}		}		break;	}	if (t < end-1)		*t++ = c;	*s = p;	*to = t;}	/* count rune positions */intcountposn(char *s, int n){	int i, j;	char *end;	for (i = 0, end = s+n; *s && s < end; i++){		j = mblen(s, n);		if(j <= 0)			j = 1;		s += j;	}	return(i);}	/* pattern package error handler */voidregerror(char *s){	FATAL("%s", s);}voidoverflow(void){	FATAL("%s", "regular expression too big");}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -