📄 sed.c
字号:
/* sed - stream editor Author: Eric S. Raymond *//* This used to be three different files with the following makefile: * (Note the chmem).CFLAGS= -F -T.OBJS= sedcomp.s sedexec.ssed: $(OBJS) cc -T. -o sed $(OBJS) @chmem =13312 sed$(OBJS): sed.h * If you want longer lines: increase MAXBUF. * If you want scripts with more text: increase POOLSIZE. * If you want more commands per script: increase MAXCMDS. */#include <ctype.h>#include <sys/types.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <stdio.h>/*+++++++++++++++*//* Sed.h -- types and constants for the stream editor *//* Data area sizes used by both modules */#define MAXBUF 4000 /* current line buffer size */#define MAXAPPENDS 20 /* maximum number of appends */#define MAXTAGS 9 /* tagged patterns are \1 to \9 *//* Constants for compiled-command representation */#define EQCMD 0x01 /* = -- print current line number */#define ACMD 0x02 /* a -- append text after current line */#define BCMD 0x03 /* b -- branch to label */#define CCMD 0x04 /* c -- change current line */#define DCMD 0x05 /* d -- delete all of pattern space */#define CDCMD 0x06 /* D -- delete first line of pattern space */#define GCMD 0x07 /* g -- copy hold space to pattern space */#define CGCMD 0x08 /* G -- append hold space to pattern space */#define HCMD 0x09 /* h -- copy pattern space to hold space */#define CHCMD 0x0A /* H -- append pattern space to hold space */#define ICMD 0x0B /* i -- insert text before current line */#define LCMD 0x0C /* l -- print pattern space in escaped form */#define NCMD 0x0D /* n -- get next line into pattern space */#define CNCMD 0x0E /* N -- append next line to pattern space */#define PCMD 0x0F /* p -- print pattern space to output */#define CPCMD 0x10 /* P -- print first line of pattern space */#define QCMD 0x11 /* q -- exit the stream editor */#define RCMD 0x12 /* r -- read in a file after current line */#define SCMD 0x13 /* s -- regular-expression substitute */#define TCMD 0x14 /* t -- branch on any substitute successful */#define CTCMD 0x15 /* T -- branch on any substitute failed */#define WCMD 0x16 /* w -- write pattern space to file */#define CWCMD 0x17 /* W -- write first line of pattern space */#define XCMD 0x18 /* x -- exhange pattern and hold spaces */#define YCMD 0x19 /* y -- transliterate text */struct cmd_t { /* compiled-command representation */ char *addr1; /* first address for command */ char *addr2; /* second address for command */ union { char *lhs; /* s command lhs */ struct cmd_t *link; /* label link */ } u; char command; /* command code */ char *rhs; /* s command replacement string */ FILE *fout; /* associated output file descriptor */ struct { char allbut; /* was negation specified? */ char global; /* was g postfix specified? */ char print; /* was p postfix specified? */ char inrange; /* in an address range? */ } flags;};typedef struct cmd_t sedcmd; /* use this name for declarations */#define BAD ((char *) -1) /* guaranteed not a string ptr *//* Address and regular expression compiled-form markers */#define STAR 1 /* marker for Kleene star */#define CCHR 2 /* non-newline character to be matched * follows */#define CDOT 4 /* dot wild-card marker */#define CCL 6 /* character class follows */#define CNL 8 /* match line start */#define CDOL 10 /* match line end */#define CBRA 12 /* tagged pattern start marker */#define CKET 14 /* tagged pattern end marker */#define CBACK 16 /* backslash-digit pair marker */#define CLNUM 18 /* numeric-address index follows */#define CEND 20 /* symbol for end-of-source */#define CEOF 22 /* end-of-field mark *//* Sed.h ends here */#ifndef CMASK#define CMASK 0xFF /* some char type should have been unsigned * char? */#endif/*+++++++++++++++*//* Sed - stream editor Author: Eric S. Raymond *//* The stream editor compiles its command input (from files or -e options) into an internal form using compile() then executes the compiled form using execute(). Main() just initializes data structures, interprets command line options, and calls compile() and execute() in appropriate sequence. The data structure produced by compile() is an array of compiled-command structures (type sedcmd). These contain several pointers into pool[], the regular-expression and text-data pool, plus a command code and g & p flags. In the special case that the command is a label the struct will hold a ptr into the labels array labels[] during most of the compile, until resolve() resolves references at the end. The operation of execute() is described in its source module.*//* #include <stdio.h> *//* #include "sed.h" *//* Imported functions *//***** public stuff ******/#define MAXCMDS 500 /* maximum number of compiled commands */#define MAXLINES 256 /* max # numeric addresses to compile *//* Main data areas */char linebuf[MAXBUF + 1]; /* current-line buffer */sedcmd cmds[MAXCMDS + 1]; /* hold compiled commands */long linenum[MAXLINES]; /* numeric-addresses table *//* Miscellaneous shared variables */int nflag; /* -n option flag */int eargc; /* scratch copy of argument count */char **eargv; /* scratch copy of argument list */char bits[] = {1, 2, 4, 8, 16, 32, 64, 128};/***** module common stuff *****/#define POOLSIZE 20000 /* size of string-pool space */#define WFILES 10 /* max # w output files that can be compiled */#define RELIMIT 256 /* max chars in compiled RE */#define MAXDEPTH 20 /* maximum {}-nesting level */#define MAXLABS 50 /* max # of labels that can be handled */#define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++#define ABORT(msg) (fprintf(stderr, msg, linebuf), quit(2))#define IFEQ(x, v) if (*x == v) x++ , /* do expression *//* Error messages */static char AGMSG[] = "sed: garbled address %s\n";static char CGMSG[] = "sed: garbled command %s\n";static char TMTXT[] = "sed: too much text: %s\n";static char AD1NG[] = "sed: no addresses allowed for %s\n";static char AD2NG[] = "sed: only one address allowed for %s\n";static char TMCDS[] = "sed: too many commands, last was %s\n";static char COCFI[] = "sed: cannot open command-file %s\n";static char UFLAG[] = "sed: unknown flag %c\n";static char CCOFI[] = "sed: cannot create %s\n";static char ULABL[] = "sed: undefined label %s\n";static char TMLBR[] = "sed: too many {'s\n";static char FRENL[] = "sed: first RE must be non-null\n";static char NSCAX[] = "sed: no such command as %s\n";static char TMRBR[] = "sed: too many }'s\n";static char DLABL[] = "sed: duplicate label %s\n";static char TMLAB[] = "sed: too many labels: %s\n";static char TMWFI[] = "sed: too many w files\n";static char REITL[] = "sed: RE too long: %s\n";static char TMLNR[] = "sed: too many line numbers\n";static char TRAIL[] = "sed: command \"%s\" has trailing garbage\n";typedef struct { /* represent a command label */ char *name; /* the label name */ sedcmd *last; /* it's on the label search list */ sedcmd *address; /* pointer to the cmd it labels */} label;/* Label handling */static label labels[MAXLABS]; /* here's the label table */static label *lab = labels + 1; /* pointer to current label */static label *lablst = labels; /* header for search list *//* String pool for regular expressions, append text, etc. etc. */static char pool[POOLSIZE]; /* the pool */static char *fp = pool; /* current pool pointer */static char *poolend = pool + POOLSIZE; /* pointer past pool end *//* Compilation state */static FILE *cmdf = NULL; /* current command source */static char *cp = linebuf; /* compile pointer */static sedcmd *cmdp = cmds; /* current compiled-cmd ptr */static char *lastre = NULL; /* old RE pointer */static int bdepth = 0; /* current {}-nesting level */static int bcount = 0; /* # tagged patterns in current RE *//* Compilation flags */static int eflag; /* -e option flag */static int gflag; /* -g option flag */_PROTOTYPE(int main, (int argc, char **argv));_PROTOTYPE(static void compile, (void));_PROTOTYPE(static int cmdcomp, (int cchar));_PROTOTYPE(static char *rhscomp, (char *rhsp, int delim));_PROTOTYPE(static char *recomp, (char *expbuf, int redelim));_PROTOTYPE(static int cmdline, (char *cbuf));_PROTOTYPE(static char *address, (char *expbuf));_PROTOTYPE(static char *gettext, (char *txp));_PROTOTYPE(static label *search, (label *ptr));_PROTOTYPE(static void resolve, (void));_PROTOTYPE(static char *ycomp, (char *ep, int delim));_PROTOTYPE(void quit, (int n));_PROTOTYPE(void execute, (void));_PROTOTYPE(static int selected, (sedcmd *ipc));_PROTOTYPE(static int match, (char *expbuf, int gf));_PROTOTYPE(static int advance, (char *lp, char *ep));_PROTOTYPE(static int substitute, (sedcmd *ipc));_PROTOTYPE(static void dosub, (char *rhsbuf));_PROTOTYPE(static char *place, (char *asp, char *al1, char *al2));_PROTOTYPE(static void listto, (char *p1, FILE *fp));_PROTOTYPE(static void truncated, (int h));_PROTOTYPE(static void command, (sedcmd *ipc));_PROTOTYPE(static void openfile, (char *file));_PROTOTYPE(static void get, (void));_PROTOTYPE(static void initget, (void));_PROTOTYPE(static char *getline, (char *buf));_PROTOTYPE(static int Memcmp, (char *a, char *b, int count));_PROTOTYPE(static void readout, (void));int main(argc, argv)/* Main sequence of the stream editor */int argc;char *argv[];{ eargc = argc; /* set local copy of argument count */ eargv = argv; /* set local copy of argument list */ cmdp->addr1 = pool; /* 1st addr expand will be at pool start */ if (eargc == 1) quit(0); /* exit immediately if no arguments */ /* Scan through the arguments, interpreting each one */ while ((--eargc > 0) && (**++eargv == '-')) switch (eargv[0][1]) { case 'e': eflag++; compile(); /* compile with e flag on */ eflag = 0; continue; /* get another argument */ case 'f': if (eargc-- <= 0) /* barf if no -f file */ quit(2); if ((cmdf = fopen(*++eargv, "r")) == NULL) { fprintf(stderr, COCFI, *eargv); quit(2); } compile(); /* file is O.K., compile it */ fclose(cmdf); continue; /* go back for another argument */ case 'g': gflag++; /* set global flag on all s cmds */ continue; case 'n': nflag++; /* no print except on p flag or w */ continue; default: fprintf(stdout, UFLAG, eargv[0][1]); continue; } if (cmdp == cmds) { /* no commands have been compiled */ eargv--; eargc++; eflag++; compile(); eflag = 0; eargv++; eargc--; } if (bdepth) /* we have unbalanced squigglies */ ABORT(TMLBR); lablst->address = cmdp; /* set up header of label linked list */ resolve(); /* resolve label table indirections */ execute(); /* execute commands */ quit(0); /* everything was O.K. if we got here */ return(0);}#define H 0x80 /* 128 bit, on if there's really code for * command */#define LOWCMD 56 /* = '8', lowest char indexed in cmdmask *//* Indirect through this to get command internal code, if it exists */static char cmdmask[] ={ 0, 0, H, 0, 0, H + EQCMD, 0, 0, 0, 0, 0, 0, H + CDCMD, 0, 0, CGCMD, CHCMD, 0, 0, 0, 0, 0, CNCMD, 0, CPCMD, 0, 0, 0, H + CTCMD, 0, 0, H + CWCMD, 0, 0, 0, 0, 0, 0, 0, 0, 0, H + ACMD, H + BCMD, H + CCMD, DCMD, 0, 0, GCMD, HCMD, H + ICMD, 0, 0, H + LCMD, 0, NCMD, 0, PCMD, H + QCMD, H + RCMD, H + SCMD, H + TCMD, 0, 0, H + WCMD, XCMD, H + YCMD, 0, H + BCMD, 0, H, 0, 0,};static void compile()/* Precompile sed commands out of a file */{ char ccode; for (;;) { /* main compilation loop */ if (*cp == '\0') { /* get a new command line */ *linebuf = '\0'; /* K.H */ if (cmdline(cp = linebuf) < 0) break; } SKIPWS(cp); if (*cp == '\0') /* empty */ continue; if (*cp == '#') { /* comment */ while (*cp) ++cp; continue; } if (*cp == ';') { /* ; separates cmds */ cp++; continue; } /* Compile first address */ if (fp > poolend) ABORT(TMTXT); else if ((fp = address(cmdp->addr1 = fp)) == BAD) ABORT(AGMSG); if (fp == cmdp->addr1) {/* if empty RE was found */ if (lastre) /* if there was previous RE */ cmdp->addr1 = lastre; /* use it */ else ABORT(FRENL); } else if (fp == NULL) {/* if fp was NULL */ fp = cmdp->addr1; /* use current pool location */ cmdp->addr1 = NULL; } else { lastre = cmdp->addr1; if (*cp == ',' || *cp == ';') { /* there's 2nd addr */ cp++; if (fp > poolend) ABORT(TMTXT); fp = address(cmdp->addr2 = fp); if (fp == BAD || fp == NULL) ABORT(AGMSG); if (fp == cmdp->addr2) cmdp->addr2 = lastre; else lastre = cmdp->addr2; } else cmdp->addr2 = NULL; /* no 2nd address */ } if (fp > poolend) ABORT(TMTXT); SKIPWS(cp); /* discard whitespace after address */ IFEQ(cp, '!') cmdp->flags.allbut = 1; SKIPWS(cp); /* get cmd char, range-check it */ if ((*cp < LOWCMD) || (*cp > '~') || ((ccode = cmdmask[*cp - LOWCMD]) == 0)) ABORT(NSCAX); cmdp->command = ccode & ~H; /* fill in command value */ if ((ccode & H) == 0) /* if no compile-time code */ cp++; /* discard command char */ else if (cmdcomp(*cp++))/* execute it; if ret = 1 */ continue; /* skip next line read */ if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS); SKIPWS(cp); /* look for trailing stuff */ if (*cp != '\0' && *cp != ';' && *cp != '#') ABORT(TRAIL); }}static int cmdcomp(cchar)/* Compile a single command */register char cchar; /* character name of command */{ static sedcmd **cmpstk[MAXDEPTH]; /* current cmd stack for {} */ static char *fname[WFILES]; /* w file name pointers */ static FILE *fout[WFILES]; /* w file file ptrs */ static int nwfiles = 1; /* count of open w files */ int i; /* indexing dummy used in w */ sedcmd *sp1, *sp2; /* temps for label searches */ label *lpt; char redelim; /* current RE delimiter */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -