📄 sed.c
字号:
/* * sed -- stream editor * * */#include <u.h>#include <libc.h>#include <bio.h>#include <regexp.h>enum { DEPTH = 20, /* max nesting depth of {} */ MAXCMDS = 512, /* max sed commands */ ADDSIZE = 10000, /* size of add & read buffer */ MAXADDS = 20, /* max pending adds and reads */ LBSIZE = 8192, /* input line size */ LABSIZE = 50, /* max label name size */ MAXSUB = 10, /* max number of sub reg exp */ MAXFILES = 120, /* max output files */}; /* An address is a line #, a R.E., "$", a reference to the last * R.E., or nothing. */typedef struct { enum { A_NONE, A_DOL, A_LINE, A_RE, A_LAST, }type; union { long line; /* Line # */ Reprog *rp; /* Compiled R.E. */ };} Addr;typedef struct SEDCOM { Addr ad1; /* optional start address */ Addr ad2; /* optional end address */ union { Reprog *re1; /* compiled R.E. */ Rune *text; /* added text or file name */ struct SEDCOM *lb1; /* destination command of branch */ }; Rune *rhs; /* Right-hand side of substitution */ Biobuf* fcode; /* File ID for read and write */ char command; /* command code -see below */ char gfl; /* 'Global' flag for substitutions */ char pfl; /* 'print' flag for substitutions */ char active; /* 1 => data between start and end */ char negfl; /* negation flag */} SedCom; /* Command Codes for field SedCom.command */#define ACOM 01#define BCOM 020#define CCOM 02#define CDCOM 025#define CNCOM 022#define COCOM 017#define CPCOM 023#define DCOM 03#define ECOM 015#define EQCOM 013#define FCOM 016#define GCOM 027#define CGCOM 030#define HCOM 031#define CHCOM 032#define ICOM 04#define LCOM 05#define NCOM 012#define PCOM 010#define QCOM 011#define RCOM 06#define SCOM 07#define TCOM 021#define WCOM 014#define CWCOM 024#define YCOM 026#define XCOM 033 typedef struct label { /* Label symbol table */ Rune asc[9]; /* Label name */ SedCom *chain; SedCom *address; /* Command associated with label */} Label;typedef struct FILE_CACHE { /* Data file control block */ struct FILE_CACHE *next; /* Forward Link */ char *name; /* Name of file */} FileCache;SedCom pspace[MAXCMDS]; /* Command storage */SedCom *pend = pspace+MAXCMDS; /* End of command storage */SedCom *rep = pspace; /* Current fill point */Reprog *lastre = 0; /* Last regular expression */Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */Rune *addend = addspace+ADDSIZE;SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */SedCom **aptr = abuf;struct { /* Sed program input control block */ enum PTYPE /* Either on command line or in file */ { P_ARG, P_FILE } type; union PCTL { /* Pointer to data */ Biobuf *bp; char *curr; };} prog;Rune genbuf[LBSIZE]; /* Miscellaneous buffer */FileCache *fhead = 0; /* Head of File Cache Chain */FileCache *ftail = 0; /* Tail of File Cache Chain */Rune *loc1; /* Start of pattern match */Rune *loc2; /* End of pattern match */Rune seof; /* Pattern delimiter char */Rune linebuf[LBSIZE+1]; /* Input data buffer */Rune *lbend = linebuf+LBSIZE; /* End of buffer */Rune *spend = linebuf; /* End of input data */Rune *cp; /* Current scan point in linebuf */Rune holdsp[LBSIZE+1]; /* Hold buffer */Rune *hend = holdsp+LBSIZE; /* End of hold buffer */Rune *hspend = holdsp; /* End of hold data */int nflag; /* Command line flags */int gflag;int dolflag; /* Set when at true EOF */int sflag; /* Set when substitution done */int jflag; /* Set when jump required */int delflag; /* Delete current line when set */long lnum = 0; /* Input line count */char fname[MAXFILES][40]; /* File name cache */Biobuf *fcode[MAXFILES]; /* File ID cache */int nfiles = 0; /* Cache fill point */Biobuf fout; /* Output stream */Biobuf stdin; /* Default input */Biobuf* f = 0; /* Input data */Label ltab[LABSIZE]; /* Label name symbol table */Label *labend = ltab+LABSIZE; /* End of label table */Label *lab = ltab+1; /* Current Fill point */int depth = 0; /* {} stack pointer */Rune bad; /* Dummy err ptr reference */Rune *badp = &bad;char CGMES[] = "Command garbled: %S";char TMMES[] = "Too much text: %S";char LTL[] = "Label too long: %S";char AD0MES[] = "No addresses allowed: %S";char AD1MES[] = "Only one address allowed: %S";void address(Addr *);void arout(void);int cmp(char *, char *);int rcmp(Rune *, Rune *);void command(SedCom *);Reprog *compile(void);Rune *compsub(Rune *, Rune *);void dechain(void);void dosub(Rune *);int ecmp(Rune *, Rune *, int);void enroll(char *);void errexit(void);int executable(SedCom *);void execute(void);void fcomp(void);long getrune(void);Rune *gline(Rune *);int match(Reprog *, Rune *);void newfile(enum PTYPE, char *);int opendata(void);Biobuf *open_file(char *);Rune *place(Rune *, Rune *, Rune *);void quit(char *, char *);int rline(Rune *, Rune *);Label *search(Label *);int substitute(SedCom *);char *text(char *);Rune *stext(Rune *, Rune *);int ycomp(SedCom *);char * trans(int c);void putline(Biobuf *bp, Rune *buf, int n);voidmain(int argc, char **argv){ int compfl; lnum = 0; Binit(&fout, 1, OWRITE); fcode[nfiles++] = &fout; compfl = 0; if(argc == 1) exits(0); ARGBEGIN{ case 'n': nflag++; continue; case 'f': if(argc <= 1) quit("no pattern-file", 0); newfile(P_FILE, ARGF()); fcomp(); compfl = 1; continue; case 'e': if (argc <= 1) quit("missing pattern", 0); newfile(P_ARG, ARGF()); fcomp(); compfl = 1; continue; case 'g': gflag++; continue; default: fprint(2, "sed: Unknown flag: %c\n", ARGC()); continue; } ARGEND if(compfl == 0) { if (--argc < 0) quit("missing pattern", 0); newfile(P_ARG, *argv++); fcomp(); } if(depth) quit("Too many {'s", 0); ltab[0].address = rep; dechain(); if(argc <= 0) enroll(0); /* Add stdin to cache */ else while(--argc >= 0) { enroll(*argv++); } execute(); exits(0);}voidfcomp(void){ Rune *tp; SedCom *pt, *pt1; int i; Label *lpt; static Rune *p = addspace; static SedCom **cmpend[DEPTH]; /* stack of {} operations */ while (rline(linebuf, lbend) >= 0) { cp = linebuf;comploop: while(*cp == ' ' || *cp == '\t') cp++; if(*cp == '\0' || *cp == '#') continue; if(*cp == ';') { cp++; goto comploop; } address(&rep->ad1); if (rep->ad1.type != A_NONE) { if (rep->ad1.type == A_LAST) { if (!lastre) quit("First RE may not be null", 0); rep->ad1.type = A_RE; rep->ad1.rp = lastre; } if(*cp == ',' || *cp == ';') { cp++; address(&rep->ad2); if (rep->ad2.type == A_LAST) { rep->ad2.type = A_RE; rep->ad2.rp = lastre; } } else rep->ad2.type = A_NONE; } while(*cp == ' ' || *cp == '\t') cp++;swit: switch(*cp++) { default: quit("Unrecognized command: %S", (char *)linebuf); case '!': rep->negfl = 1; goto swit; case '{': rep->command = BCOM; rep->negfl = !(rep->negfl); cmpend[depth++] = &rep->lb1; if(++rep >= pend) quit("Too many commands: %S", (char *) linebuf); if(*cp == '\0') continue; goto comploop; case '}': if(rep->ad1.type != A_NONE) quit(AD0MES, (char *) linebuf); if(--depth < 0) quit("Too many }'s", 0); *cmpend[depth] = rep; if(*cp == 0) continue; goto comploop; case '=': rep->command = EQCOM; if(rep->ad2.type != A_NONE) quit(AD1MES, (char *) linebuf); break; case ':': if(rep->ad1.type != A_NONE) quit(AD0MES, (char *) linebuf); while(*cp == ' ') cp++; tp = lab->asc; while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { *tp++ = *cp++; if(tp >= &(lab->asc[8])) quit(LTL, (char *) linebuf); } *tp = '\0'; if(lpt = search(lab)) { if(lpt->address) quit("Duplicate labels: %S", (char *) linebuf); } else { lab->chain = 0; lpt = lab; if(++lab >= labend) quit("Too many labels: %S", (char *) linebuf); } lpt->address = rep; if (*cp == '#') continue; rep--; /* reuse this slot */ break; case 'a': rep->command = ACOM; if(rep->ad2.type != A_NONE) quit(AD1MES, (char *) linebuf); if(*cp == '\\') cp++; if(*cp++ != '\n') quit(CGMES, (char *) linebuf); rep->text = p; p = stext(p, addend); break; case 'c': rep->command = CCOM; if(*cp == '\\') cp++; if(*cp++ != '\n') quit(CGMES, (char *) linebuf); rep->text = p; p = stext(p, addend); break; case 'i': rep->command = ICOM; if(rep->ad2.type != A_NONE) quit(AD1MES, (char *) linebuf); if(*cp == '\\') cp++; if(*cp++ != '\n') quit(CGMES, (char *) linebuf); rep->text = p; p = stext(p, addend); break; case 'g': rep->command = GCOM; break; case 'G': rep->command = CGCOM; break; case 'h': rep->command = HCOM; break; case 'H': rep->command = CHCOM; break; case 't': rep->command = TCOM; goto jtcommon; case 'b': rep->command = BCOM;jtcommon: while(*cp == ' ')cp++; if(*cp == '\0') { if(pt = ltab[0].chain) { while(pt1 = pt->lb1) pt = pt1; pt->lb1 = rep; } else ltab[0].chain = rep; break; } tp = lab->asc; while((*tp++ = *cp++)) if(tp >= &(lab->asc[8])) quit(LTL, (char *) linebuf); cp--; tp[-1] = '\0'; if(lpt = search(lab)) { if(lpt->address) { rep->lb1 = lpt->address; } else { pt = lpt->chain; while(pt1 = pt->lb1) pt = pt1; pt->lb1 = rep; } } else { lab->chain = rep; lab->address = 0; if(++lab >= labend) quit("Too many labels: %S", (char *) linebuf); } break; case 'n': rep->command = NCOM; break; case 'N': rep->command = CNCOM; break; case 'p': rep->command = PCOM; break; case 'P': rep->command = CPCOM; break; case 'r': rep->command = RCOM; if(rep->ad2.type != A_NONE) quit(AD1MES, (char *) linebuf); if(*cp++ != ' ') quit(CGMES, (char *) linebuf); rep->text = p; p = stext(p, addend); break; case 'd': rep->command = DCOM; break; case 'D': rep->command = CDCOM; rep->lb1 = pspace; break; case 'q': rep->command = QCOM; if(rep->ad2.type != A_NONE) quit(AD1MES, (char *) linebuf); break; case 'l': rep->command = LCOM; break; case 's': rep->command = SCOM; seof = *cp++; if ((rep->re1 = compile()) == 0) { if(!lastre) quit("First RE may not be null.", 0); rep->re1 = lastre; } rep->rhs = p; if((p = compsub(p, addend)) == 0) quit(CGMES, (char *) linebuf); if(*cp == 'g') { cp++; rep->gfl++; } else if(gflag) rep->gfl++; if(*cp == 'p') { cp++; rep->pfl = 1; } if(*cp == 'P') { cp++; rep->pfl = 2; } if(*cp == 'w') { cp++; if(*cp++ != ' ') quit(CGMES, (char *) linebuf); text(fname[nfiles]); for(i = nfiles - 1; i >= 0; i--) if(cmp(fname[nfiles],fname[i]) == 0) { rep->fcode = fcode[i]; goto done; } if(nfiles >= MAXFILES) quit("Too many files in w commands 1", 0); rep->fcode = open_file(fname[nfiles]); } break; case 'w': rep->command = WCOM; if(*cp++ != ' ') quit(CGMES, (char *) linebuf); text(fname[nfiles]); for(i = nfiles - 1; i >= 0; i--) if(cmp(fname[nfiles], fname[i]) == 0) { rep->fcode = fcode[i]; goto done; } if(nfiles >= MAXFILES){ fprint(2, "sed: Too many files in w commands 2 \n"); fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); errexit(); } rep->fcode = open_file(fname[nfiles]); break; case 'x': rep->command = XCOM; break; case 'y': rep->command = YCOM; seof = *cp++; if (ycomp(rep) == 0) quit(CGMES, (char *) linebuf); break; }done: if(++rep >= pend) quit("Too many commands, last: %S", (char *) linebuf); if(*cp++ != '\0') { if(cp[-1] == ';') goto comploop; quit(CGMES, (char *) linebuf); } }}Biobuf *open_file(char *name){ Biobuf *bp; int fd; if ((bp = malloc(sizeof(Biobuf))) == 0) quit("Out of memory", 0); if ((fd = open(name, OWRITE)) < 0 && (fd = create(name, OWRITE, 0666)) < 0) quit("Cannot create %s", name); Binit(bp, fd, OWRITE); Bseek(bp, 0, 2); fcode[nfiles++] = bp; return bp;}Rune *compsub(Rune *rhs, Rune *end){ Rune r; while ((r = *cp++) != '\0') { if(r == '\\') { if (rhs < end) *rhs++ = 0xFFFF; else return 0; r = *cp++; if(r == 'n') r = '\n'; } else { if(r == seof) { if (rhs < end) *rhs++ = '\0'; else return 0; return rhs; } } if (rhs < end) *rhs++ = r; else return 0; } return 0;}Reprog *compile(void){ Rune c; char *ep; char expbuf[512]; if((c = *cp++) == seof) /* '//' */ return 0; ep = expbuf; do { if (c == 0 || c == '\n') quit(TMMES, (char *) linebuf); if (c == '\\') { if (ep >= expbuf+sizeof(expbuf)) quit(TMMES, (char *) linebuf); ep += runetochar(ep, &c); if ((c = *cp++) == 'n') c = '\n'; } if (ep >= expbuf+sizeof(expbuf)) quit(TMMES, (char *) linebuf); ep += runetochar(ep, &c); } while ((c = *cp++) != seof); *ep = 0; return lastre = regcomp(expbuf);}voidregerror(char *s){ USED(s); quit(CGMES, (char *) linebuf);}voidnewfile(enum PTYPE type, char *name){ if (type == P_ARG) prog.curr = name; else if ((prog.bp = Bopen(name, OREAD)) == 0) quit("Cannot open pattern-file: %s\n", name); prog.type = type;}intrline(Rune *buf, Rune *end){ long c; Rune r; while ((c = getrune()) >= 0) { r = c; if (r == '\\') { if (buf <= end) *buf++ = r; if ((c = getrune()) < 0) break; r = c; } else if (r == '\n') { *buf = '\0'; return(1); } if (buf <= end) *buf++ = r; } *buf = '\0'; return(-1);}longgetrune(void){ char *p; long c; Rune r; if (prog.type == P_ARG) { if ((p = prog.curr) != 0) { if (*p) { prog.curr += chartorune(&r, p); c = r; } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -