egrep.c
来自「早期freebsd实现」· C语言 代码 · 共 925 行 · 第 1/2 页
C
925 行
if (k == NULL) return; } if (nflag) { if (prevmatch) nline = prevnline + nlcount(prevloc, k); else nline = nline + nlcount(str, k); prevmatch = 0; } strncpy(str, linetemp, nleftover); } if (cflag) { /* Bug from old grep: -c overrides -h. We fix the bug. */ if (!hflag) printf("%s:", file); printf("%ld\n", nmatch); }}char *linesave(str, count) /* accumulate partial line at end of buffer */ char str[]; register int count;{ register int j; count += nleftover; if (count != BUFSIZE && fd != 0) str[count++] = NL; /* insurance for broken last line */ str[count] = EOS; for (j = count - 1; str[j] != NL && j >= 0;) j--; /* * break up these lines: long line (> BUFSIZE), last line of file, or * short return from read(), as from tee(1) input */ if (j < 0 && (count == (BUFSIZE - nleftover))) { str[count++] = NL; str[count] = EOS; linetemp[0] = EOS; nleftover = 0; return (str + count); } else { nleftover = count - j - 1; strncpy(linetemp, str + j + 1, nleftover); return (str + j); }}/* * Process partial match. First check for mis-aligned Kanji, then match line * against full compiled r.e. if statistics do not warrant handing off to * standard egrep. */char *submatch(file, pat, str, strend, k, altindex) char file[], pat[], str[]; register char *strend, *k; int altindex;{ register char *s; char *t, c; t = k; s = ((altflag) ? k - altlen[altindex] + 1 : k - altmin + 1);#ifndef NOKANJI c = ((altflag) ? altpat[altindex][0] : pat[0]); if (c & NONASCII) if ((s = kanji(str, s, k)) == NULL) return (++k); /* reject false kanji */#endif do; while (*s != NL && --s >= str); k = s + 1; /* now at line start */ if (boyonly) return (gotamatch(file, k)); incount = counted - (strend - k); if (boyfound++ == FIRSTFEW) execstrategy(file); s = t; do rxcount++; while (*s++ != NL); *--s = EOS; /* * "quick henry -- the flit" (after theodor geisel) */ if (regexec(rspencer, ((iflag) ? fold(k) : k)) == 1) { *s = NL; if (gotamatch(file, k) == NULL) return (NULL); } *s = NL; return (s + 1);}#ifndef NOKANJI/* * EUC code disambiguation -- scan backwards to first 7-bit code, while * counting intervening 8-bit codes. If odd, reject unaligned Kanji pattern. * SS2/3 checks are for intermixed Japanase Katakana or Kanji2. */char *kanji(str, s, k) register char *str, *s, *k;{ register int j = 0; for (s--; s >= str; s--) { if (*s == SS2 || *s == SS3 || (*s & NONASCII) == 0) break; j++; }#ifndef CHINESE if (*s == SS2) j -= 1;#endif CHINESE return ((j & 01) ? NULL : k);}#endif/* * Compute "Boyer-Moore" delta table -- put skip distance in delta0[c] */gosper(pattern) char *pattern; /* ... HAKMEM lives ... */{ register int i, j; unsigned char c; /* Make one-string case look like simple alternatives case */ if (!altflag) { nalt = 1; altmin = altlen[0] = strlen(pattern); altpat[0] = pattern; } /* For chars that aren't in any string, skip by string length. */ for (j = 0; j < 256; j++) { delta0[j] = altmin; cmap[j] = j; /* Sneak in initialization of cmap */ } /* For chars in a string, skip distance from char to end of string. */ /* (If char appears more than once, skip minimum distance.) */ for (i = 0; i < nalt; i++) for (j = 0; j < altlen[i] - 1; j++) { c = altpat[i][j]; delta0[c] = MIN(delta0[c], altlen[i] - j - 1); if (iflag && islower((int) c)) delta0[toupper((int) c)] = delta0[c]; } /* For last char of each string, fall out of search loop. */ for (i = 0; i < nalt; i++) { c = altpat[i][altlen[i] - 1]; delta0[c] = LARGE; if (iflag && islower((int) c)) delta0[toupper((int) c)] = LARGE; } if (iflag) for (j = 'A'; j <= 'Z'; j++) cmap[j] = tolower((int) j);}/* * Print, count, or stop on full match. Result is either the location for * continued search, or NULL to stop. */char *gotamatch(file, s) register char *file, *s;{ char *savematch(); int squirrel = 0; /* nonzero to squirrel away FIRSTFEW matches */ nmatch++; nsuccess = 1; if (!boyonly && boyfound <= FIRSTFEW && file != NULL) squirrel = 1; if (sflag) return (NULL); /* -s usurps all flags (unlike some versions) */ if (cflag) { /* -c overrides -l, we guess */ do; while (*s++ != NL); } else if (lflag) { puts(file); return (NULL); } else { if (!hflag) if (!squirrel) printf("%s:", file); else (void)sprintf(preamble, "%s:", file); if (nflag) { if (prevmatch) prevnline = prevnline + nlcount(prevloc, s); else prevnline = nline + nlcount(str, s); prevmatch = 1; if (!squirrel) printf("%ld:", prevnline); else (void)sprintf(preamble + strlen(preamble), "%ld:", prevnline); } if (!squirrel) { do putchar(*s); while (*s++ != NL); } else s = savematch(s); if (nflag) prevloc = s - 1; } return ((firstflag && !cflag) ? NULL : s);}char *fold(line) char *line;{ static char fline[BUFSIZE]; register char *s, *t = fline; for (s = line; *s != EOS; s++) *t++ = (isupper((int) *s) ? (char) tolower((int) *s) : *s); *t = EOS; return (fline);}strindex(s, t) /* the easy way, as in K&P, p. 192 */ char *s, *t;{ int i, n; n = strlen(t); for (i = 0; s[i] != '\0'; i++) if (strncmp(s + i, t, n) == 0) return (i); return (-1);}char *grepxlat(pattern) /* grep pattern meta conversion */ char *pattern;{ register char *p, *s; static char newpat[BUFSIZE]; for (s = newpat, p = pattern; *p != EOS;) { if (*p == '\\') { /* skip escapes ... */ *s++ = *p++; if (*p) *s++ = *p++; } else if (*p == '[') { /* ... and char classes */ while (*p != EOS && *p != ']') *s++ = *p++; } else if (strchr("+?|()", *p) != NULL) { *s++ = '\\'; /* insert protection */ *s++ = *p++; } else *s++ = *p++; } *s = EOS; grepflag = ((patind) ? 0 : 1); return (newpat);}/* * Test for simple alternation. Result is NULL if it's not so simple, or is * a pointer to the first string if it is. Warning: sscanf size is a * fixpoint, beyond which the speedup linearity starts to break down. In the * wake of the elegant aho/corrasick "trie"-based fgrep, generalizing * altpat[] to arbitrary size is not useful. */char *alternate(regexpr) char *regexpr;{ register int i, j; register char *start, *stop; unsigned char c; if (fgrepflag && strchr(regexpr, '|')) return (NULL); /* * break pattern up into altpat array; delimit on newline, bar, * or EOS. We know we won't overflow, we've already checked the * number of patterns we're going to find against NALT. * Also, set length of pattern and find minimum pattern length. */ nalt = 0; altmin = NMUSH; for (start = stop = regexpr;; ++stop) if (!*stop || *stop == '|' || *stop == NL) { altlen[nalt] = j = stop - start; if (j < altmin) altmin = j; if (!(altpat[nalt] = malloc((u_int)(j + 1)))) oops("out of memory"); bcopy(start, altpat[nalt], j); altpat[nalt][j] = EOS; ++nalt; if (!*stop) break; if (nalt == NALT) return(NULL); if (*stop == NL) *stop = '|'; start = stop + 1; } if (!fgrepflag) { if (strchr(regexpr, '|') == NULL || regexpr[0] == '|') return (NULL); if (strpbrk(regexpr, "^$.[]()?+*\\") != NULL || strindex(regexpr, "||") >= 0) return (NULL); } if (nalt > 1) { /* build superimposed "pre-match" sets per * char */ altflag++; for (j = 0; j < nalt; j++) for (i = 0; i < altmin; i++) { c = altpat[j][altlen[j] - altmin + i]; altset[i + 1][c] = 1; /* offset for sentinel */ } } return (altpat[0]);}/* * Grapple with the dfa (std egrep) vs. ndfa (regexp) tradeoff. Criteria to * determine whether to use dfa-based egrep: We do FIRSTFEW matches with * regexec(). If Boyer-Moore up to now matched more than PUNTPERCENT * of the input, the r.e. is likely to be underspecified, so do old *grep, * which is faster on complex patterns than regexp(). At FIRSTFEW, * dump the saved matches collected by savematch(). They are saved * so that a "PUNT" can "rewind" to ignore them. Stdin is problematic, * since it's hard to rewind. */execstrategy(file) char *file;{ int pctmatch; pctmatch = (100 * rxcount) / incount; if (pctmatch > PUNTPERCENT && file != NULL) kernighan(args); if (file != NULL) flushmatches();}nlcount(bstart, bstop) /* flail interval to totalize newlines. */ char *bstart, *bstop;{ register char *s = bstart; register char *t = bstop; register int count = 0; do { /* loop unroll for older architectures */ if (*t == NL) /* ... ask ames!jaw for sample code */ count++; } while (t-- > s); return (count);}char *isolate(regexpr) /* isolate longest metacharacter-free string */ char *regexpr;{ char *dummyexpr; /* * We add (.)* because Henry's regcomp only figures regmust if it * sees a leading * pattern. Foo! */ dummyexpr = malloc((unsigned) strlen(regexpr) + 5); (void)sprintf(dummyexpr, "(.)*%s", regexpr); if ((rspencer = regcomp(dummyexpr)) == NULL) kernighan(args); return (rspencer->regmust);}char *matches[FIRSTFEW];static int mcount = 0;char *savematch(s) /* horde matches during statistics gathering */ register char *s;{ char *p; char *start = s; int msize = 0; int psize = strlen(preamble); while (*s++ != NL) msize++; *--s = EOS; p = malloc((unsigned) msize + 1 + psize); strcpy(p, preamble); strcpy(p + psize, start); matches[mcount++] = p; preamble[0] = 0; *s = NL; return (s);}flushmatches(){ int n; flushflag = 1; for (n = 0; n < mcount; n++) printf("%s\n", matches[n]); mcount = 0;}oops(message) char *message;{ fprintf(stderr, "%s: %s\n", progname, message); exit(2);}kernighan(args) /* "let others do the hard part ..." */ char *args[];{ /* * We may have already run grep on some of the files; remove them * from the arg list we pass on. Note that we can't delete them * totally because the number of file names affects the output * (automatic -h). */ /* better would be fork/exec per punted file -- jaw */ while (firstfile && optind > firstfile) args[firstfile++] = _PATH_DEVNULL; if (patind) args[patind] = pattern; (void) fflush(stdout); if (grepflag) execvp(_PATH_GREPSTD, args), oops("can't exec old 'grep'"); else if (fgrepflag) execvp(_PATH_FGREPSTD, args), oops("can't exec old 'fgrep'"); else execvp(_PATH_EGREPSTD, args), oops("can't exec old 'egrep'");}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?