⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 search.c

📁 早期freebsd实现
💻 C
字号:
/* $Id: search.c,v 3.0 1992/02/01 03:09:32 davison Trn $ *//* string search routines */ /*		Copyright (c) 1981,1980 James Gosling		*/ /* Modified Aug. 12, 1981 by Tom London to include regular expressions   as in ed.  RE stuff hacked over by jag to correct a few major problems,   mainly dealing with searching within the buffer rather than copying   each line to a separate array.  Newlines can now appear in RE's *//* Ripped to shreds and glued back together to make a search package, * July 6, 1984, by Larry Wall. (If it doesn't work, it's probably my fault.) * Changes include: *	Buffer, window, and mlisp stuff gone. *	Translation tables reduced to 1 table. *	Expression buffer is now dynamically allocated. *	Character classes now implemented with a bitmap. */#include "EXTERN.h"#include "common.h"#include "util.h"#include "INTERN.h"#include "search.h"#ifndef BITSPERBYTE#define BITSPERBYTE 8#endif#define BMAPSIZ (127 / BITSPERBYTE + 1)/* meta characters in the "compiled" form of a regular expression */#define	CBRA	2		/* \( -- begin bracket */#define	CCHR	4		/* a vanilla character */#define	CDOT	6		/* . -- match anything except a newline */#define	CCL	8		/* [...] -- character class */#define	NCCL	10		/* [^...] -- negated character class */#define	CDOL	12		/* $ -- matches the end of a line */#define	CEND	14		/* The end of the pattern */#define	CKET	16		/* \) -- close bracket */#define	CBACK	18		/* \N -- backreference to the Nth bracketed				   string */#define CIRC	20		/* ^ matches the beginning of a line */#define WORD	32		/* matches word character \w */#define NWORD	34		/* matches non-word characer \W */#define WBOUND	36		/* matches word boundary \b */#define NWBOUND	38		/* matches non-(word boundary) \B */ #define	STAR	01		/* * -- Kleene star, repeats the previous				   REas many times as possible; the value				   ORs with the other operator types */ #define ASCSIZ 0200typedef char	TRANSTABLE[ASCSIZ];static	TRANSTABLE trans = {0000,0001,0002,0003,0004,0005,0006,0007,0010,0011,0012,0013,0014,0015,0016,0017,0020,0021,0022,0023,0024,0025,0026,0027,0030,0031,0032,0033,0034,0035,0036,0037,0040,0041,0042,0043,0044,0045,0046,0047,0050,0051,0052,0053,0054,0055,0056,0057,0060,0061,0062,0063,0064,0065,0066,0067,0070,0071,0072,0073,0074,0075,0076,0077,0100,0101,0102,0103,0104,0105,0106,0107,0110,0111,0112,0113,0114,0115,0116,0117,0120,0121,0122,0123,0124,0125,0126,0127,0130,0131,0132,0133,0134,0135,0136,0137,0140,0141,0142,0143,0144,0145,0146,0147,0150,0151,0152,0153,0154,0155,0156,0157,0160,0161,0162,0163,0164,0165,0166,0167,0170,0171,0172,0173,0174,0175,0176,0177,};static bool folding = FALSE;static int err;static char *FirstCharacter;voidsearch_init(){#ifdef UNDEF    register int    i;        for (i = 0; i < ASCSIZ; i++)	trans[i] = i;#else    ;#endif}voidinit_compex(compex)register COMPEX *compex;{    /* the following must start off zeroed */    compex->eblen = 0;    compex->brastr = Nullch;}voidfree_compex(compex)register COMPEX *compex;{    if (compex->eblen) {	free(compex->expbuf);	compex->eblen = 0;    }    if (compex->brastr) {	free(compex->brastr);	compex->brastr = Nullch;    }}static char *gbr_str = Nullch;static int gbr_siz = 0;char *getbracket(compex,n)register COMPEX *compex;int n;{    int length = compex->braelist[n] - compex->braslist[n];    if (!compex->nbra || n > compex->nbra || !compex->braelist[n] || length<0)	return nullstr;    growstr(&gbr_str, &gbr_siz, length+1);    safecpy(gbr_str, compex->braslist[n], length+1);    return gbr_str;}voidcase_fold(which)int which;{    register int i;    if (which != folding) {	if (which) {	    for (i = 'A'; i <= 'Z'; i++)		trans[i] = tolower(i);	}	else {	    for (i = 'A'; i <= 'Z'; i++)		trans[i] = i;	}	folding = which;    }}/* Compile the given regular expression into a [secret] internal format */char *compile(compex, strp, RE, fold)register COMPEX *compex;register char   *strp;int RE;int fold;{    register int c;    register char  *ep;    char   *lastep;    char    bracket[NBRA],	   *bracketp;    char **alt = compex->alternatives;    char *retmes = "Badly formed search string";     case_fold(compex->do_folding = fold);    if (!compex->eblen) {	compex->expbuf = safemalloc(84);	compex->eblen = 80;    }    ep = compex->expbuf;		/* point at expression buffer */    *alt++ = ep;			/* first alternative starts here */    bracketp = bracket;			/* first bracket goes here */    if (*strp == 0) {			/* nothing to compile? */	if (*ep == 0)			/* nothing there yet? */	    return "Null search string";	return Nullch;			/* just keep old expression */    }    compex->nbra = 0;			/* no brackets yet */    lastep = 0;    for (;;) {	if (ep - compex->expbuf >= compex->eblen)	    ep = grow_eb(compex, ep, alt);	c = *strp++;			/* fetch next char of pattern */	if (c == 0) {			/* end of pattern? */	    if (bracketp != bracket) {	/* balanced brackets? */#ifdef VERBOSE		retmes = "Unbalanced parens";#endif		goto cerror;	    }	    *ep++ = CEND;		/* terminate expression */	    *alt++ = 0;			/* terminal alternative list */	    return Nullch;		/* return success */	}	if (c != '*')	    lastep = ep;	if (!RE) {			/* just a normal search string? */	    *ep++ = CCHR;		/* everything is a normal char */	    *ep++ = c;	}	else				/* it is a regular expression */	    switch (c) { 		case '\\':		/* meta something */		    switch (c = *strp++) {		    case '(':			if (compex->nbra >= NBRA) {#ifdef VERBOSE			    retmes = "Too many parens";#endif			    goto cerror;			}			*bracketp++ = ++compex->nbra;			*ep++ = CBRA;			*ep++ = compex->nbra;			break;		    case '|':			if (bracketp>bracket) {#ifdef VERBOSE			    retmes = "No \\| in parens";	/* Alas! */#endif			    goto cerror;			}			*ep++ = CEND;			*alt++ = ep;			break;		    case ')':			if (bracketp <= bracket) {#ifdef VERBOSE			    retmes = "Unmatched right paren";#endif			    goto cerror;			}			*ep++ = CKET;			*ep++ = *--bracketp;			break;		    case 'w':			*ep++ = WORD;			break;		    case 'W':			*ep++ = NWORD;			break;		    case 'b':			*ep++ = WBOUND;			break;		    case 'B':			*ep++ = NWBOUND;			break;		    case '0': case '1': case '2': case '3': case '4':		    case '5': case '6': case '7': case '8': case '9':			*ep++ = CBACK;			*ep++ = c - '0';			break;		    default:			*ep++ = CCHR;			if (c == '\0')			    goto cerror;			*ep++ = c;			break;		    }		    break;		case '.':		    *ep++ = CDOT;		    continue; 		case '*':		    if (lastep == 0 || *lastep == CBRA || *lastep == CKET			|| *lastep == CIRC			|| (*lastep&STAR)|| *lastep>NWORD)			goto defchar;		    *lastep |= STAR;		    continue; 		case '^':		    if (ep != compex->expbuf && ep[-1] != CEND)			goto defchar;		    *ep++ = CIRC;		    continue; 		case '$':		    if (*strp != 0 && (*strp != '\\' || strp[1] != '|'))			goto defchar;		    *ep++ = CDOL;		    continue; 		case '[': {		/* character class */		    register int i;		    		    if (ep - compex->expbuf >= compex->eblen - BMAPSIZ)			ep = grow_eb(compex, ep, alt); /* reserve bitmap */		    for (i = BMAPSIZ; i; --i)			ep[i] = 0;		    		    if ((c = *strp++) == '^') {			c = *strp++;			*ep++ = NCCL;	/* negated */		    }		    else			*ep++ = CCL;	/* normal */		    		    i = 0;		/* remember oldchar */		    do {			if (c == '\0') {#ifdef VERBOSE			    retmes = "Missing ]";#endif			    goto cerror;			}			if (*strp == '-' && *(++strp) != ']' && *strp)			    i = *strp++;			else			    i = c;			while (c <= i) {			    ep[c / BITSPERBYTE] |= 1 << (c % BITSPERBYTE);			    if (fold && isalpha(c))				ep[(c ^ 32) / BITSPERBYTE] |=				    1 << ((c ^ 32) % BITSPERBYTE);					/* set the other bit too */			    c++;			}		    } while ((c = *strp++) != ']');		    ep += BMAPSIZ;		    continue;		} 	    defchar:		default:		    *ep++ = CCHR;		    *ep++ = c;	    }    }cerror:    compex->expbuf[0] = 0;    compex->nbra = 0;    return retmes;}char *grow_eb(compex, epp, alt)register COMPEX *compex;char *epp;char **alt;{    register char *oldbuf = compex->expbuf;    register char **altlist = compex->alternatives;    compex->eblen += 80;    compex->expbuf = saferealloc(compex->expbuf, (MEM_SIZE)compex->eblen + 4);    if (compex->expbuf != oldbuf) {	/* realloc can change expbuf! */	epp += compex->expbuf - oldbuf;	while (altlist != alt)	    *altlist++ += compex->expbuf - oldbuf;     }    return epp;}char *execute(compex, addr)register COMPEX *compex;char *addr;{    register char *p1 = addr;    register char *trt = trans;    register int c;     if (addr == Nullch || compex->expbuf == Nullch)	return Nullch;    if (compex->nbra) {			/* any brackets? */	for (c = 0; c <= compex->nbra; c++)	    compex->braslist[c] = compex->braelist[c] = Nullch;	if (compex->brastr)	    free(compex->brastr);	compex->brastr = savestr(p1);	/* in case p1 is not static */	p1 = compex->brastr;		/* ! */    }    case_fold(compex->do_folding);	/* make sure table is correct */    FirstCharacter = p1;		/* for ^ tests */    if (compex->expbuf[0] == CCHR && !compex->alternatives[1]) {	c = trt[compex->expbuf[1]];	/* fast check for first character */	do {	    if (trt[*p1] == c && advance(compex, p1, compex->expbuf))		return p1;	    p1++;	} while (*p1 && !err);	if (err) err = 0;	return Nullch;    }    else {			/* regular algorithm */	do {	    register char **alt = compex->alternatives;	    while (*alt) {		if (advance(compex, p1, *alt++))		    return p1;	    }	    p1++;	} while (*p1 && !err);	if (err) err = 0;	return Nullch;    }   /*NOTREACHED*/} /* advance the match of the regular expression starting at ep along the   string lp, simulates an NDFSA */booladvance(compex, lp, ep)register COMPEX *compex;register char *ep;register char *lp;{    register char *curlp;    register char *trt = trans;    register int i;     while ((*ep & STAR) || *lp || *ep == CIRC || *ep == CKET)	switch (*ep++) { 	    case CCHR:		if (trt[*ep++] != trt[*lp]) return FALSE;		lp++;		continue; 	    case CDOT:		if (*lp == '\n') return FALSE;		lp++;		continue; 	    case CDOL:		if (!*lp || *lp == '\n')		    continue;		return FALSE; 	    case CIRC:		if (lp == FirstCharacter || lp[-1]=='\n')		    continue;		return FALSE; 	    case WORD:		if (isalnum(*lp)) {		    lp++;		    continue;		}		return FALSE; 	    case NWORD:		if (!isalnum(*lp)) {		    lp++;		    continue;		}		return FALSE; 	    case WBOUND:		if ((lp == FirstCharacter || !isalnum(lp[-1])) !=			(!*lp || !isalnum(*lp)) )		    continue;		return FALSE; 	    case NWBOUND:		if ((lp == FirstCharacter || !isalnum(lp[-1])) ==			(!*lp || !isalnum(*lp)))		    continue;		return FALSE; 	    case CEND:		return TRUE; 	    case CCL:		if (cclass(ep, *lp, 1)) {		    ep += BMAPSIZ;		    lp++;		    continue;		}		return FALSE; 	    case NCCL:		if (cclass(ep, *lp, 0)) {		    ep += BMAPSIZ;		    lp++;		    continue;		}		return FALSE; 	    case CBRA:		compex->braslist[*ep++] = lp;		continue; 	    case CKET:		i = *ep++;		compex->braelist[i] = lp;		compex->braelist[0] = lp;		compex->braslist[0] = compex->braslist[i];		continue; 	    case CBACK:		if (compex->braelist[i = *ep++] == 0) {		    fputs("bad braces\n",stdout) FLUSH;		    err = TRUE;		    return FALSE;		}		if (backref(compex, i, lp)) {		    lp += compex->braelist[i] - compex->braslist[i];		    continue;		}		return FALSE; 	    case CBACK | STAR:		if (compex->braelist[i = *ep++] == 0) {		    fputs("bad braces\n",stdout) FLUSH;		    err = TRUE;		    return FALSE;		}		curlp = lp;		while (backref(compex, i, lp)) {		    lp += compex->braelist[i] - compex->braslist[i];		}		while (lp >= curlp) {		    if (advance(compex, lp, ep))			return TRUE;		    lp -= compex->braelist[i] - compex->braslist[i];		}		continue; 	    case CDOT | STAR:		curlp = lp;		while (*lp++ && lp[-1] != '\n');		goto star; 	    case WORD | STAR:		curlp = lp;		while (*lp++ && isalnum(lp[-1]));		goto star; 	    case NWORD | STAR:		curlp = lp;		while (*lp++ && !isalnum(lp[-1]));		goto star; 	    case CCHR | STAR:		curlp = lp;		while (*lp++ && trt[lp[-1]] == trt[*ep]);		ep++;		goto star; 	    case CCL | STAR:	    case NCCL | STAR:		curlp = lp;		while (*lp++ && cclass(ep, lp[-1], ep[-1] == (CCL | STAR)));		ep += BMAPSIZ;		goto star; 	star:		do {		    lp--;		    if (advance(compex, lp, ep))			return TRUE;		} while (lp > curlp);		return FALSE; 	    default:		fputs("Badly compiled pattern\n",stdout) FLUSH;		err = TRUE;		return -1;	}    if (*ep == CEND || *ep == CDOL || *ep == WBOUND) {	return TRUE;    }    return FALSE;} boolbackref(compex, i, lp)register COMPEX *compex;register int i;register char *lp;{    register char *bp;     bp = compex->braslist[i];    while (*lp && *bp == *lp) {	bp++;	lp++;	if (bp >= compex->braelist[i])	    return TRUE;    }    return FALSE;}boolcclass(set, c, af)register char  *set;register int c;int af;{    c &= 0177;#if BITSPERBYTE == 8    if (set[c >> 3] & 1 << (c & 7))#else    if (set[c / BITSPERBYTE] & 1 << (c % BITSPERBYTE))#endif	return af;    return !af;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -