egrep.c

来自「早期freebsd实现」· C语言 代码 · 共 925 行 · 第 1/2 页

C
925
字号
			if (k == NULL)				return;		}		if (nflag) {			if (prevmatch)				nline = prevnline + nlcount(prevloc, k);			else				nline = nline + nlcount(str, k);			prevmatch = 0;		}		strncpy(str, linetemp, nleftover);	}	if (cflag) {		/* Bug from old grep: -c overrides -h.  We fix the bug. */		if (!hflag)			printf("%s:", file);		printf("%ld\n", nmatch);	}}char *linesave(str, count)		/* accumulate partial line at end of buffer */	char str[];	register int count;{	register int j;	count += nleftover;	if (count != BUFSIZE && fd != 0)		str[count++] = NL;	/* insurance for broken last line */	str[count] = EOS;	for (j = count - 1; str[j] != NL && j >= 0;)		j--;	/*	 * break up these lines: long line (> BUFSIZE), last line of file, or	 * short return from read(), as from tee(1) input 	 */	if (j < 0 && (count == (BUFSIZE - nleftover))) {		str[count++] = NL;		str[count] = EOS;		linetemp[0] = EOS;		nleftover = 0;		return (str + count);	} else {		nleftover = count - j - 1;		strncpy(linetemp, str + j + 1, nleftover);		return (str + j);	}}/* * Process partial match. First check for mis-aligned Kanji, then match line * against full compiled r.e. if statistics do not warrant handing off to * standard egrep.  */char *submatch(file, pat, str, strend, k, altindex)	char file[], pat[], str[];	register char *strend, *k;	int altindex;{	register char *s;	char *t, c;	t = k;	s = ((altflag) ? k - altlen[altindex] + 1 : k - altmin + 1);#ifndef NOKANJI	c = ((altflag) ? altpat[altindex][0] : pat[0]);	if (c & NONASCII)		if ((s = kanji(str, s, k)) == NULL)			return (++k);	/* reject false kanji */#endif	do;	while (*s != NL && --s >= str);	k = s + 1;		/* now at line start */	if (boyonly)		return (gotamatch(file, k));	incount = counted - (strend - k);	if (boyfound++ == FIRSTFEW)		execstrategy(file);	s = t;	do		rxcount++;	while (*s++ != NL);	*--s = EOS;	/*	 * "quick henry -- the flit" (after theodor geisel) 	 */	if (regexec(rspencer, ((iflag) ? fold(k) : k)) == 1) {		*s = NL;		if (gotamatch(file, k) == NULL)			return (NULL);	}	*s = NL;	return (s + 1);}#ifndef NOKANJI/* * EUC code disambiguation -- scan backwards to first 7-bit code, while * counting intervening 8-bit codes.  If odd, reject unaligned Kanji pattern.  * SS2/3 checks are for intermixed Japanase Katakana or Kanji2.  */char *kanji(str, s, k)	register char *str, *s, *k;{	register int j = 0;	for (s--; s >= str; s--) {		if (*s == SS2 || *s == SS3 || (*s & NONASCII) == 0)			break;		j++;	}#ifndef CHINESE	if (*s == SS2)		j -= 1;#endif  CHINESE	return ((j & 01) ? NULL : k);}#endif/* * Compute "Boyer-Moore" delta table -- put skip distance in delta0[c]  */gosper(pattern)	char *pattern;		/* ... HAKMEM lives ... */{	register int i, j;	unsigned char c;	/* Make one-string case look like simple alternatives case */	if (!altflag) {		nalt = 1;		altmin = altlen[0] = strlen(pattern);		altpat[0] = pattern;	}	/* For chars that aren't in any string, skip by string length. */	for (j = 0; j < 256; j++) {		delta0[j] = altmin;		cmap[j] = j;	/* Sneak in initialization of cmap */	}	/* For chars in a string, skip distance from char to end of string. */	/* (If char appears more than once, skip minimum distance.) */	for (i = 0; i < nalt; i++)		for (j = 0; j < altlen[i] - 1; j++) {			c = altpat[i][j];			delta0[c] = MIN(delta0[c], altlen[i] - j - 1);			if (iflag && islower((int) c))				delta0[toupper((int) c)] = delta0[c];		}	/* For last char of each string, fall out of search loop. */	for (i = 0; i < nalt; i++) {		c = altpat[i][altlen[i] - 1];		delta0[c] = LARGE;		if (iflag && islower((int) c))			delta0[toupper((int) c)] = LARGE;	}	if (iflag)		for (j = 'A'; j <= 'Z'; j++)			cmap[j] = tolower((int) j);}/* * Print, count, or stop on full match. Result is either the location for * continued search, or NULL to stop.  */char *gotamatch(file, s)	register char *file, *s;{	char *savematch();	int squirrel = 0;	/* nonzero to squirrel away FIRSTFEW matches */	nmatch++;	nsuccess = 1;	if (!boyonly && boyfound <= FIRSTFEW && file != NULL)		squirrel = 1;	if (sflag)		return (NULL);	/* -s usurps all flags (unlike some versions) */	if (cflag) {		/* -c overrides -l, we guess */		do;		while (*s++ != NL);	} else if (lflag) {		puts(file);		return (NULL);	} else {		if (!hflag)			if (!squirrel)				printf("%s:", file);			else				(void)sprintf(preamble, "%s:", file);		if (nflag) {			if (prevmatch)				prevnline = prevnline + nlcount(prevloc, s);			else				prevnline = nline + nlcount(str, s);			prevmatch = 1;			if (!squirrel)				printf("%ld:", prevnline);			else				(void)sprintf(preamble + strlen(preamble),					"%ld:", prevnline);		}		if (!squirrel) {			do				putchar(*s);			while (*s++ != NL);		} else			s = savematch(s);		if (nflag)			prevloc = s - 1;	}	return ((firstflag && !cflag) ? NULL : s);}char *fold(line)	char *line;{	static char fline[BUFSIZE];	register char *s, *t = fline;	for (s = line; *s != EOS; s++)		*t++ = (isupper((int) *s) ? (char) tolower((int) *s) : *s);	*t = EOS;	return (fline);}strindex(s, t)			/* the easy way, as in K&P, p. 192 */	char *s, *t;{	int i, n;	n = strlen(t);	for (i = 0; s[i] != '\0'; i++)		if (strncmp(s + i, t, n) == 0)			return (i);	return (-1);}char *grepxlat(pattern)		/* grep pattern meta conversion */	char *pattern;{	register char *p, *s;	static char newpat[BUFSIZE];	for (s = newpat, p = pattern; *p != EOS;) {		if (*p == '\\') {	/* skip escapes ... */			*s++ = *p++;			if (*p)				*s++ = *p++;		} else if (*p == '[') {	/* ... and char classes */			while (*p != EOS && *p != ']')				*s++ = *p++;		} else if (strchr("+?|()", *p) != NULL) {			*s++ = '\\';	/* insert protection */			*s++ = *p++;		} else			*s++ = *p++;	}	*s = EOS;	grepflag = ((patind) ? 0 : 1);	return (newpat);}/* * Test for simple alternation.  Result is NULL if it's not so simple, or is * a pointer to the first string if it is. Warning:  sscanf size is a * fixpoint, beyond which the speedup linearity starts to break down.  In the * wake of the elegant aho/corrasick "trie"-based fgrep, generalizing * altpat[] to arbitrary size is not useful.  */char *alternate(regexpr)	char *regexpr;{	register int i, j;	register char *start, *stop;	unsigned char c;	if (fgrepflag && strchr(regexpr, '|'))			return (NULL);	/*	 * break pattern up into altpat array; delimit on newline, bar,	 * or EOS.  We know we won't overflow, we've already checked the	 * number of patterns we're going to find against NALT.	 * Also, set length of pattern and find minimum pattern length.	 */	nalt = 0;	altmin = NMUSH;	for (start = stop = regexpr;; ++stop)		if (!*stop || *stop == '|' || *stop == NL) {			altlen[nalt] = j = stop - start;			if (j < altmin)				altmin = j;			if (!(altpat[nalt] = malloc((u_int)(j + 1))))				oops("out of memory");			bcopy(start, altpat[nalt], j);			altpat[nalt][j] = EOS;			++nalt;			if (!*stop)				break;			if (nalt == NALT)				return(NULL);			if (*stop == NL)				*stop = '|';			start = stop + 1;		}	if (!fgrepflag) {		if (strchr(regexpr, '|') == NULL || regexpr[0] == '|')			return (NULL);		if (strpbrk(regexpr, "^$.[]()?+*\\") != NULL		    || strindex(regexpr, "||") >= 0)			return (NULL);	}	if (nalt > 1) {		/* build superimposed "pre-match" sets per				 * char */		altflag++;		for (j = 0; j < nalt; j++)			for (i = 0; i < altmin; i++) {				c = altpat[j][altlen[j] - altmin + i];				altset[i + 1][c] = 1;	/* offset for sentinel */			}	}	return (altpat[0]);}/* * Grapple with the dfa (std egrep) vs. ndfa (regexp) tradeoff. Criteria to * determine whether to use dfa-based egrep:  We do FIRSTFEW matches with * regexec().  If Boyer-Moore up to now matched more than PUNTPERCENT * of the input, the r.e. is likely to be underspecified, so do old *grep, * which is faster on complex patterns than regexp().  At FIRSTFEW, * dump the saved matches collected by savematch(). They are saved * so that a "PUNT" can "rewind" to ignore them.  Stdin is problematic, * since it's hard to rewind.  */execstrategy(file)	char *file;{	int pctmatch;	pctmatch = (100 * rxcount) / incount;	if (pctmatch > PUNTPERCENT && file != NULL)		kernighan(args);	if (file != NULL)		flushmatches();}nlcount(bstart, bstop)		/* flail interval to totalize newlines. */	char *bstart, *bstop;{	register char *s = bstart;	register char *t = bstop;	register int count = 0;	do {			/* loop unroll for older architectures */		if (*t == NL)	/* ... ask ames!jaw for sample code */			count++;	} while (t-- > s);	return (count);}char *isolate(regexpr)		/* isolate longest metacharacter-free string */	char *regexpr;{	char *dummyexpr;	/*	 * We add (.)* because Henry's regcomp only figures regmust if it	 * sees a leading * pattern.  Foo! 	 */	dummyexpr = malloc((unsigned) strlen(regexpr) + 5);	(void)sprintf(dummyexpr, "(.)*%s", regexpr);	if ((rspencer = regcomp(dummyexpr)) == NULL)		kernighan(args);	return (rspencer->regmust);}char *matches[FIRSTFEW];static int mcount = 0;char *savematch(s)			/* horde matches during statistics gathering */	register char *s;{	char *p;	char *start = s;	int msize = 0;	int psize = strlen(preamble);	while (*s++ != NL)		msize++;	*--s = EOS;	p = malloc((unsigned) msize + 1 + psize);	strcpy(p, preamble);	strcpy(p + psize, start);	matches[mcount++] = p;	preamble[0] = 0;	*s = NL;	return (s);}flushmatches(){	int n;	flushflag = 1;	for (n = 0; n < mcount; n++)		printf("%s\n", matches[n]);	mcount = 0;}oops(message)	char *message;{	fprintf(stderr, "%s: %s\n", progname, message);	exit(2);}kernighan(args)			/* "let others do the hard part ..." */	char *args[];{	/*	 * We may have already run grep on some of the files; remove them	 * from the arg list we pass on.  Note that we can't delete them	 * totally because the number of file names affects the output	 * (automatic -h). 	 */	/* better would be fork/exec per punted file -- jaw */	while (firstfile && optind > firstfile)		args[firstfile++] = _PATH_DEVNULL;	if (patind)		args[patind] = pattern;	(void) fflush(stdout);	if (grepflag)		execvp(_PATH_GREPSTD, args), oops("can't exec old 'grep'");	else if (fgrepflag)		execvp(_PATH_FGREPSTD, args), oops("can't exec old 'fgrep'");	else		execvp(_PATH_EGREPSTD, args), oops("can't exec old 'egrep'");}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?