sedcomp.c
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,082 行 · 第 1/4 页
C
1,082 行
/* sedcomp.c -- stream editor main and compilation phase
The stream editor compiles its command input (from files or -e options)
into an internal form using compile() then executes the compiled form using
execute(). Main() just initializes data structures, interprets command line
options, and calls compile() and execute() in appropriate sequence.
The data structure produced by compile() is an array of compiled-command
structures (type sedcmd). These contain several pointers into pool[], the
regular-expression and text-data pool, plus a command code and g & p flags.
In the special case that the command is a label the struct will hold a ptr
into the labels array labels[] during most of the compile, until resolve()
resolves references at the end.
The operation of execute() is described in its source module.
==== Written for the GNU operating system by Eric S. Raymond ==== */
#include <assert.h>
#include <ctype.h> /* isdigit(), isspace() */
#include <unistd.h> /* isatty() */
#include <stdio.h> /* uses getc, fprintf, fopen, fclose */
#include <stdlib.h> /* uses exit */
#include <string.h> /* imported string functions */
#include "sed.h" /* command type struct & name defines */
#define MAXCMDS 400 /* max number of compiled commands */
#define MAXLINES 256 /* max number of numeric addresses */
/* main data areas */
char linebuf[MAXBUF + 3]; /* current-line buffer */
sedcmd cmds[MAXCMDS + 1]; /* hold compiled commands */
long linenum[MAXLINES]; /* numeric-addresses table */
/* miscellaneous shared variables */
int nflag = 0; /* -n option flag */
int eargc; /* scratch copy of argument count */
char const bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
/***** module common stuff *****/
#define POOLSIZE 10000 /* size of string-pool space */
#define WFILES 10 /* max number of w output files */
#define RELIMIT 256 /* max chars in compiled RE */
#define MAXDEPTH 20 /* maximum {}-nesting level */
#define MAXLABS 50 /* max number of labels */
#define SKIPWS(pc) while( isspace( *pc ) ) pc++
#define ABORT(msg) fprintf( stderr, msg, linebuf ), myexit( 2 )
#define IFEQ(x, v) if( *x == v ) x++ , /* do expression */
/* error messages */
static char const AGMSG[] = "sed: garbled address %s\n";
static char const CGMSG[] = "sed: garbled command %s\n";
static char const TMTXT[] = "sed: too much text: %s\n";
static char const AD1NG[] = "sed: no addresses allowed for %s\n";
static char const AD2NG[] = "sed: only one address allowed for %s\n";
static char const TMCDS[] = "sed: too many commands, last was %s\n";
static char const COCFI[] = "sed: cannot open command-file %s\n";
static char const UFLAG[] = "sed: unknown flag %c\n";
static char const CCOFI[] = "sed: cannot create %s\n";
static char const ULABL[] = "sed: undefined label \":%s\"\n";
static char const TMLBR[] = "sed: too many {'s\n";
static char const NSCAX[] = "sed: no such command as %s\n";
static char const TMRBR[] = "sed: too many }'s\n";
static char const DLABL[] = "sed: duplicate label \"%s\"\n";
static char const TMLAB[] = "sed: too many labels \"%s\"\n";
static char const TMWFI[] = "sed: too many w files\n";
static char const REITL[] = "sed: RE too long: %s\n";
static char const TMLNR[] = "sed: too many line numbers\n";
static char const TRAIL[] = "sed: command \"%s\" has trailing garbage\n";
static char const NEEDB[] = "sed: error processing: %s\n";
static char const INERR[] = "sed: internal error: %s\n";
static char const SMCNT[] = "sed: bad value for match count on s command %s\n";
static char const UNCLS[] = "sed: invalid character class name %s\n";
static char const *USAGE[] = {
"Usage: sed [-g] [-n] script file ...",
" sed [-g] [-n] -e script ... -f script_file ... file ...",
"",
" -g: global substitute -n: no default copy,",
" script: append to commands, script_file: append contents to commands",
" file: a text file - lines of upto 8K bytes, \\n not needed on last line",
" default is to read from standard input",
"",
"General command format:",
" [addr1][,addr2] [!]function [arguments]",
" [addr1][,addr2] [!]s/[RE]/replace/[p|P][g][w file]",
"",
"Command list:",
" a: append, b: branch, c: change, d/D: delete,",
" g/G: get, h/H: hold, i: insert, l: list,",
" n/N: next, p/P: print, q: quit, r: read,",
" s: substitute, t/T: test, w/W: write, x: xchange,",
" y: translate, :label, =: number, {}: grouping",
"",
"Regular expression elements:",
" \\n, and \\t: newline, and tab",
" ^: line begin, $: line end,",
" [a-z$0-9]: class, [^a-z$]: inverse-class,",
" .: one character, *: 0 or more repeats,",
" +: one or more repeats, \\{n,m\\} n to m repeats,",
" \\(..\\): subexpression, \\1..\\9: matched subexpression,",
"",
"Substitution patterns:",
" \\n,\\t: newline and tab",
" &: match pattern, \\1..\\9: matched subexpression",
NULL
};
typedef struct /* represent a command label */
{
char *name; /* the label name */
sedcmd *last; /* it's on the label search list */
sedcmd *link; /* pointer to the cmd it labels */
} label;
/* label handling */
static label labels[MAXLABS]; /* here's the label table */
/* first label is end of script */
static label *curlab = labels + 1; /* pointer to current label */
static label *lablst = labels; /* header for search list */
/* string pool for REs, etc. */
static char pool[POOLSIZE]; /* the pool */
static char *fp = pool; /* current pool pointer */
static char *poolend = pool + POOLSIZE; /* pointer past pool end */
/* compilation state */
static FILE *cmdf = NULL; /* current command source */
static char *cp = NULL; /* compile pointer */
static sedcmd *cmdp = cmds; /* current compiled-cmd ptr */
static int bdepth = 0; /* current {}-nesting level */
static int bcount = 0; /* # tagged patterns in current RE */
static char **eargv; /* scratch copy of argument list */
/* compilation flags */
static int eflag = 0; /* -e option flag */
static int gflag = 0; /* -g option flag */
static void compile( void );
static int cmdcomp( register char cchar );
static char *rhscomp( register char *rhsp, register char delim );
static char *recomp( char *expbuf, char redelim );
static int cmdline( register char *cbuf );
static char *getaddress( register char *expbuf );
static void gettext( int accept_whitespace );
static label *search( void );
static void resolve( void );
static char *ycomp( register char *ep, char delim );
static void myexit( int status );
static void usage( void );
#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
#ifndef isblank // OW defines isblank without defining __STDC_VERSION__ W.Briscoe 20041008
static int isblank( int c )
{
return( c == ' ' || c == '\t' );
}
#endif
#endif
/* main sequence of the stream editor */
int main( int argc, char *argv[] )
{
static char dummy_name[] = "progend\n";
lablst->name = dummy_name; /* Must set so strcmp can be done */
cmdp->addr1 = pool; /* 1st addr expand will be at pool start */
if( argc <= 1 )
usage(); /* exit immediately if no arguments */
eargc = argc; /* set local copy of argument count */
eargv = argv; /* set local copy of argument list */
/* scan through the arguments, interpreting each one */
/* We dont use the OW GetOpt() or the POSIX getopt() as we want to do
* -e i\ hello -e "s/$/ world" */
while( --eargc > 0 && **++eargv == '-' ) {
int const flag = eargv[0][1];
/* Support "folded" flags such as -ng rather than -n -g */
if( eargv[0][2] ) {
char *fr = eargv[0]+2;
char *to;
switch( flag ) {
case 'e':
case 'f':
to = fr - 2;
break;
case 'g':
case 'n':
to = fr - 1;
break;
default:
to = fr;
break;
}
/* Move up remaining data */
while( ( *to++ = *fr++ ) != 0 ) ;
++eargc, --eargv; /* Consider rest of argument "again" */
}
switch( flag ) {
case 'e':
eflag++;
compile(); /* compile with e flag on */
eflag = 0;
break; /* get another argument */
case 'f':
if( --eargc <= 0 ) /* barf if no -f file */
fprintf( stderr, NEEDB, eargv[0] ), myexit( 2 );
if( ( cmdf = fopen( *++eargv, "r" ) ) == NULL )
fprintf( stderr, COCFI, *eargv ), myexit( 2 );
compile(); /* file is O.K., compile it */
fclose( cmdf );
break; /* go back for another argument */
case 'g':
gflag++; /* set global flag on all s cmds */
break;
case 'n':
nflag++; /* no print except on p flag or w */
break;
default:
fprintf( stderr, UFLAG, flag );
eargv++, eargc--; /* Skip garbage argument */
break;
}
}
if( cp == NULL ) { /* no commands have been compiled */
if( eargc <= 0 )
usage(); /* exit immediately if no commands */
eargv--;
eargc++;
eflag++;
compile();
eflag = 0;
eargv++;
eargc--;
}
if( bdepth ) /* we have unbalanced squigglies */
ABORT( TMLBR );
lablst->link = cmdp; /* set up header of label linked list */
resolve(); /* resolve label table indirections */
(void)setvbuf( stdout, NULL, _IOLBF, 0 ); /* Improve reactivity in a pipe */
if( eargc <= 0 ) { /* if there are no files specified */
(void)setvbuf( stdin, NULL, _IOLBF, 0 ); /* Improve reactivity in a pipe */
execute( NULL ); /* execute commands on stdin only */
} else while( --eargc >= 0 ) /* else do commands on each file specified */
execute( *eargv++ );
return( 0 ); /* everything was O.K. if we got here */
}
/* precompile sed commands out of a file */
static void compile( void )
{
#define H 0x80 /* 128 bit, on if there's code for command */
#define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */
/* indirect through this to get command internal code, if it exists */
static char const cmdmask[] = {
0, 0, H, 0, 0, H+EQCMD, 0, 0, /* 89:;<=>? */
0, 0, 0, 0, H+CDCMD, 0, 0, CGCMD, /* @ABCDEFG */
CHCMD, 0, 0, 0, 0, 0, CNCMD, 0, /* HIJKLMNO */
CPCMD, 0, 0, 0, H+CTCMD, 0, 0, H+CWCMD, /* PQRSTUVW */
0, 0, 0, 0, 0, 0, 0, 0, /* XYZ[\]^_ */
0, H+ACMD, H+BCMD, H+CCMD, DCMD, 0, 0, GCMD, /* `abcdefg */
HCMD, H+ICMD, 0, 0, H+LCMD, 0, NCMD, 0, /* hijklmno */
PCMD, H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0, 0, H+WCMD, /* pqrstuvw */
XCMD, H+YCMD, 0, H+BCMD, 0, H, 0, 0, /* xyz{|}~ */
};
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?