📄 egrep.y
字号:
/* * Copyright (c) 1988 by * Digital Equipment Corporation, Maynard, MA * All rights reserved. * * This software is furnished under a license and may be used and * copied only in accordance with the terms of such license and * with the inclusion of the above copyright notice. This * software or any other copies thereof may not be provided or * otherwise made available to any other person. No title to and * ownership of the software is hereby transferred. * * This software is derived from software received from the * University of California, Berkeley, and from Bell * Laboratories. Use, duplication, or disclosure is subject to * restrictions under license agreements with University of * California and with AT&T. * * The information in this software is subject to change without * notice and should not be construed as a commitment by Digital * Equipment Corporation. * * Digital assumes no responsibility for the use or reliability * of its software on equipment which is not supplied by Digital.*//* @(#)egrep.y 4.1 (Ultrix) 7/17/90 *//* * egrep -- print lines containing (or not containing) a regular expression * * status returns: * 0 - ok, and some matches * 1 - ok, but no matches * 2 - some error *//************************************************************************* Modification history: ** 01 Teoman Topcubasi, 30-dec-1987* changed static buffer allocation to dynamic* allocation. No more RE too long!** 02 Lie-Min Hioe, 1-May-89* 8 bit cleaning, to provide 8 bit data transparency.** 03 Lie-Min Hioe, 7-June-89* changed constant MAXPOS from 4000 to 8000* No more core dump on certain regular expressions ** 04 Lie-Min Hioe, 06-July-89* Fixed the problem of not reading the option list properly.************************************************************************/%token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST%left OR%left CHAR DOT CCL NCCL '('%left CAT%left STAR PLUS QUEST%{#include <stdio.h>#if defined(BUFSIZ)#undef BUFSIZ#endif#define BUFSIZ 4096#define MAXPOS 8000 /* 03 - used to be 4000 */#define NCHARS 256 /* 02 - used to be 128 */#define NSTATES 256 /* 02 - used to be 128 */#define FINAL -1unsigned char gotofn[NSTATES][NCHARS]; /* 02 - used to be char */int state[NSTATES];unsigned char out[NSTATES]; /* 02 - used to be char */int line = 1;int maxlin = 2048;int *name;int *left;int *right;int *parent;int *foll;int positions[MAXPOS];unsigned char *chars; /* 02 - used to be char */int nxtpos;int nxtchar = 0;int *tmpstat;int *initstat;int xstate;int count;int icount;unsigned char *input; /* 02 - used to be char */FILE *exprfile;long lnum;int bflag;int cflag;int fflag;int lflag;int nflag;int hflag = 1;int sflag;int vflag;int retcode = 0;int nfile;int blkno;int lastread;long tln;int nsucc;int f;unsigned char *fname; /* 02 - used to be char */%}%%s: t ={ unary(FINAL, $1); line--; } ;t: b r ={ $$ = node(CAT, $1, $2); } | OR b r OR ={ $$ = node(CAT, $2, $3); } | OR b r ={ $$ = node(CAT, $2, $3); } | b r OR ={ $$ = node(CAT, $1, $2); } ;b: ={ $$ = enter(DOT); $$ = unary(STAR, $$); } ;r: CHAR ={ $$ = enter($1); } | DOT ={ $$ = enter(DOT); } | CCL ={ $$ = cclenter(CCL); } | NCCL ={ $$ = cclenter(NCCL); } ;r: r OR r ={ $$ = node(OR, $1, $3); } | r r %prec CAT ={ $$ = node(CAT, $1, $2); } | r STAR ={ $$ = unary(STAR, $1); } | r PLUS ={ $$ = unary(PLUS, $1); } | r QUEST ={ $$ = unary(QUEST, $1); } | '(' r ')' ={ $$ = $2; } | error ;%%yyerror(s) { fprintf(stderr, "egrep: %s\n", s); exit(2);}yylex() { extern int yylval; int cclcnt, x; register unsigned char c, d; /* 02 - used to be char */ switch(c = nextch()) { case '$': case '^': c = '\n'; goto defchar; case '|': return (OR); case '*': return (STAR); case '+': return (PLUS); case '?': return (QUEST); case '(': return (c); case ')': return (c); case '.': return (DOT); case '\0': return (0); case '\n': return (OR); case '[': x = CCL; cclcnt = 0; count = nxtchar++; if ((c = nextch()) == '^') { x = NCCL; c = nextch(); } do { if (c == '\0') synerror(); if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { if ((d = nextch()) != 0) { c = chars[nxtchar-1]; while (c < d) { if (nxtchar >= maxlin) overflo(); chars[nxtchar++] = ++c; cclcnt++; } continue; } } if (nxtchar >= maxlin) overflo(); chars[nxtchar++] = c; cclcnt++; } while ((c = nextch()) != ']'); chars[count] = cclcnt; return (x); case '\\': if ((c = nextch()) == '\0') synerror(); defchar: default: yylval = c; return (CHAR); }}nextch() { register int c; /* 02 - used to be char */ if (fflag) { if ((c = getc(exprfile)) == EOF) { fclose(exprfile); return(0); } } else c = *input++; return(c);}synerror() { fprintf(stderr, "egrep: syntax error\n"); exit(2);}enter(x) int x; { if(line >= maxlin) overflo(); name[line] = x; left[line] = 0; right[line] = 0; return(line++);}cclenter(x) int x; { register linno; linno = enter(x); right[linno] = count; return (linno);}node(x, l, r) { if(line >= maxlin) overflo(); name[line] = x; left[line] = l; right[line] = r; parent[l] = line; parent[r] = line; return(line++);}unary(x, d) { if(line >= maxlin) overflo(); name[line] = x; left[line] = d; right[line] = 0; parent[d] = line; return(line++);}overflo() { /* Teoman Topcubasi, 12/28/87 changed the overflow handling to reallocate memory rather than just issue an error message */ maxlin = maxlin * 2; name = (int *)realloc(name, maxlin); left = (int *)realloc(left, maxlin); right = (int *)realloc(right, maxlin); parent = (int *)realloc(parent,maxlin); foll = (int *)realloc(foll, maxlin); chars = (unsigned char *)realloc(chars, maxlin); /* 02 cast */ tmpstat = (int *)realloc(tmpstat, maxlin); initstat = (int *)realloc(initstat, maxlin);}cfoll(v) { register i; if (left[v] == 0) { count = 0; for (i=1; i<=line; i++) tmpstat[i] = 0; follow(v); add(foll, v); } else if (right[v] == 0) cfoll(left[v]); else { cfoll(left[v]); cfoll(right[v]); }}cgotofn() { register c, i, k; int n, s; unsigned char symbol[NCHARS]; /* 02 - used to be char */ int j, nc, pc, pos; int curpos, num; int number, newpos; count = 0; for (n=3; n<=line; n++) tmpstat[n] = 0; if (cstate(line-1)==0) { tmpstat[line] = 1; count++; out[0] = 1; } for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; count--; /*leave out position 1 */ icount = count; tmpstat[1] = 0; add(state, 0); n = 0; for (s=0; s<=n; s++) { if (out[s] == 1) continue; for (i=0; i<NCHARS; i++) symbol[i] = 0; num = positions[state[s]]; count = icount; for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; pos = state[s] + 1; for (i=0; i<num; i++) { curpos = positions[pos]; if ((c = name[curpos]) >= 0) { if (c < NCHARS) symbol[c] = 1; else if (c == DOT) { for (k=0; k<NCHARS; k++) if (k!='\n') symbol[k] = 1; } else if (c == CCL) { nc = chars[right[curpos]]; pc = right[curpos] + 1; for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; } else if (c == NCCL) { nc = chars[right[curpos]]; for (j = 0; j < NCHARS; j++) { pc = right[curpos] + 1; for (k = 0; k < nc; k++) if (j==chars[pc++]) goto cont; if (j!='\n') symbol[j] = 1; cont:; } } else printf("something's funny\n"); } pos++; } for (c=0; c<NCHARS; c++) { if (symbol[c] == 1) { /* nextstate(s,c) */ count = icount; for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; pos = state[s] + 1; for (i=0; i<num; i++) { curpos = positions[pos]; if ((k = name[curpos]) >= 0) if ( (k == c) | (k == DOT) | (k == CCL && member(c, right[curpos], 1)) | (k == NCCL && member(c, right[curpos], 0)) ) { number = positions[foll[curpos]]; newpos = foll[curpos] + 1; for (k=0; k<number; k++) { if (tmpstat[positions[newpos]] != 1) { tmpstat[positions[newpos]] = 1; count++; } newpos++; } } pos++; } /* end nextstate */ if (notin(n)) { if (n >= NSTATES) overflo(); add(state, ++n); if (tmpstat[line] == 1) out[n] = 1; gotofn[s][c] = n; } else { gotofn[s][c] = xstate; } } } }}cstate(v) { register b; if (left[v] == 0) { if (tmpstat[v] != 1) { tmpstat[v] = 1; count++; } return(1); } else if (right[v] == 0) { if (cstate(left[v]) == 0) return (0); else if (name[v] == PLUS) return (1); else return (0); } else if (name[v] == CAT) { if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); else return (1); } else { /* name[v] == OR */ b = cstate(right[v]); if (cstate(left[v]) == 0 || b == 0) return (0); else return (1); }}member(symb, set, torf) { register i, num, pos; num = chars[set]; pos = set + 1; for (i=0; i<num; i++) if (symb == chars[pos++]) return (torf); return (!torf);}notin(n) { register i, j, pos; for (i=0; i<=n; i++) { if (positions[state[i]] == count) { pos = state[i] + 1; for (j=0; j < count; j++) if (tmpstat[positions[pos++]] != 1) goto nxt; xstate = i; return (0); } nxt: ; } return (1);}add(array, n) int *array; { register i; if (nxtpos + count > MAXPOS) overflo(); array[n] = nxtpos; positions[nxtpos++] = count; for (i=3; i <= line; i++) { if (tmpstat[i] == 1) { positions[nxtpos++] = i; } }}follow(v) int v; { int p; if (v == line) return; p = parent[v]; switch(name[p]) { case STAR: case PLUS: cstate(v); follow(p); return; case OR: case QUEST: follow(p); return; case CAT: if (v == left[p]) { if (cstate(right[p]) == 0) { follow(p); return; } } else follow(p); return; case FINAL: if (tmpstat[line] != 1) { tmpstat[line] = 1; count++; } return; }}initialize(){ /* Teoman Topcubasi, 12/28/87 added this routine to handle dynamic allocation of buffers for egrep. */ name = (int *)malloc(maxlin); left = (int *)malloc(maxlin); right = (int *)malloc(maxlin); parent = (int *)malloc(maxlin); foll = (int *)malloc(maxlin); chars = (unsigned char *)malloc(maxlin); /* 02 - unsigned cast */ tmpstat = (int *)malloc(maxlin); initstat = (int *)malloc(maxlin);}main(argc, argv)char **argv;{ char *cp; /* 04 */ while (--argc > 0 && (++argv)[0][0]=='-') { cp = argv[0] + 1; /* 04 */ while (*cp) /* 04 */ switch (*cp++) { case 's': sflag++; continue; case 'h': hflag = 0; continue; case 'b': bflag++; continue; case 'c': cflag++; continue; case 'e': argc--; argv++; goto out; case 'f': fflag++; continue; case 'l': lflag++; continue; case 'n': nflag++; continue; case 'v': vflag++; continue; default: fprintf(stderr, "egrep: unknown flag\n"); continue; } } /* 04 */out: if (argc<=0) exit(2); if (fflag) { fname = (unsigned char *) *argv; /* 02 - cast */ exprfile = fopen(fname, "r"); if (exprfile == (FILE *)NULL) { fprintf(stderr, "egrep: can't open %s\n", fname); exit(2); } } else input = (unsigned char *)*argv; /* 02 - cast */ argc--; argv++; initialize(); yyparse(); cfoll(line-1); cgotofn(); nfile = argc; if (argc<=0) { if (lflag) exit(1); execute(0); } else while (--argc >= 0) { execute(*argv); argv++; } exit(retcode != 0 ? retcode : nsucc == 0);}execute(file)char *file;{ register unsigned char *p; /* 02 - used to be char */ register cstat; register ccount; unsigned char buf[2*BUFSIZ]; /* 02 - used to be char */ unsigned char *nlp; /* 02 - used to be char */ int istat; if (file) { if ((f = open(file, 0)) < 0) { fprintf(stderr, "egrep: can't open %s\n", file); retcode = 2; return; } } else f = 0; ccount = 0; lnum = 1; tln = 0; blkno = 0; p = buf; nlp = p; if ((ccount = read(f,p,BUFSIZ))<=0) goto done; blkno += ccount; lastread = ccount; /* save number last read */ istat = cstat = gotofn[0]['\n']; if (out[cstat]) goto found; for (;;) { cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ if (out[cstat]) { found: for(;;) { if (*p++ == '\n') { if (vflag == 0) { succeed: nsucc = 1; if (cflag) tln++; else if (sflag) ; /* ugh */ else if (lflag) { printf("%s\n", file); close(f); return; } else { if (nfile > 1 && hflag) printf("%s:", file); if (bflag) { printf("%ld:",( blkno - (nlp > &buf[BUFSIZ] ? &buf[BUFSIZ+lastread] - nlp : &buf[lastread] - nlp )) / 512); } if (nflag) printf("%ld:", lnum); if (p <= nlp) { while (nlp < &buf[2*BUFSIZ]) putchar(*nlp++); nlp = buf; } while (nlp < p) putchar(*nlp++); } } lnum++; nlp = p; if ((out[(cstat=istat)]) == 0) goto brk2; } cfound: if (--ccount <= 0) { if (p <= &buf[BUFSIZ]) { if ((ccount = read(f, p, BUFSIZ)) <= 0) goto done; } else if (p == &buf[2*BUFSIZ]) { p = buf; if ((ccount = read(f, p, BUFSIZ)) <= 0) goto done; } else { if ((ccount = read(f, p, &buf[2*BUFSIZ]-p)) <= 0) goto done; } blkno += ccount; lastread = ccount; /* save number last read */ } } } if (*p++ == '\n') { if (vflag) goto succeed; else { lnum++; nlp = p; if (out[(cstat=istat)]) goto cfound; } } brk2: if (--ccount <= 0) { if (p <= &buf[BUFSIZ]) { if ((ccount = read(f, p, BUFSIZ)) <= 0) break; } else if (p == &buf[2*BUFSIZ]) { p = buf; if ((ccount = read(f, p, BUFSIZ)) <= 0) break; } else { if ((ccount = read(f, p, &buf[2*BUFSIZ] - p)) <= 0) break; } blkno += ccount; lastread = ccount; /* save number last read */ } }done: close(f); if (cflag) { if (nfile > 1) printf("%s:", file); printf("%ld\n", tln); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -