sedcomp.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,082 行 · 第 1/4 页

C
1,082
字号
/* sedcomp.c -- stream editor main and compilation phase

   The stream editor compiles its command input  (from files or -e options)
into an internal form using compile() then executes the compiled form using
execute(). Main() just initializes data structures, interprets command line
options, and calls compile() and execute() in appropriate sequence.
   The data structure produced by compile() is an array of compiled-command
structures (type sedcmd).  These contain several pointers into pool[], the
regular-expression and text-data pool, plus a command code and g & p flags.
In the special case that the command is a label the struct  will hold a ptr
into the labels array labels[] during most of the compile,  until resolve()
resolves references at the end.
   The operation of execute() is described in its source module.

==== Written for the GNU operating system by Eric S. Raymond ==== */

#include <assert.h>
#include <ctype.h>                      /* isdigit(), isspace() */
#include <unistd.h>                     /* isatty() */
#include <stdio.h>                      /* uses getc, fprintf, fopen, fclose */
#include <stdlib.h>                     /* uses exit */
#include <string.h>                     /* imported string functions */
#include "sed.h"                        /* command type struct & name defines */

#define MAXCMDS         400             /* max number of compiled commands */
#define MAXLINES        256             /* max number of numeric addresses */

                                        /* main data areas */
char            linebuf[MAXBUF + 3];    /* current-line buffer */
sedcmd          cmds[MAXCMDS + 1];      /* hold compiled commands */
long            linenum[MAXLINES];      /* numeric-addresses table */

                                        /* miscellaneous shared variables */
int             nflag = 0;              /* -n option flag */
int             eargc;                  /* scratch copy of argument count */
char const      bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };

                                        /***** module common stuff *****/

#define POOLSIZE        10000           /* size of string-pool space */
#define WFILES          10              /* max number of w output files */
#define RELIMIT         256             /* max chars in compiled RE */
#define MAXDEPTH        20              /* maximum {}-nesting level */
#define MAXLABS         50              /* max number of labels */

#define SKIPWS(pc)      while( isspace( *pc ) ) pc++
#define ABORT(msg)      fprintf( stderr, msg, linebuf ), myexit( 2 )
#define IFEQ(x, v)      if( *x == v ) x++ , /* do expression */

                                        /* error messages */
static char const       AGMSG[] = "sed: garbled address %s\n";
static char const       CGMSG[] = "sed: garbled command %s\n";
static char const       TMTXT[] = "sed: too much text: %s\n";
static char const       AD1NG[] = "sed: no addresses allowed for %s\n";
static char const       AD2NG[] = "sed: only one address allowed for %s\n";
static char const       TMCDS[] = "sed: too many commands, last was %s\n";
static char const       COCFI[] = "sed: cannot open command-file %s\n";
static char const       UFLAG[] = "sed: unknown flag %c\n";
static char const       CCOFI[] = "sed: cannot create %s\n";
static char const       ULABL[] = "sed: undefined label \":%s\"\n";
static char const       TMLBR[] = "sed: too many {'s\n";
static char const       NSCAX[] = "sed: no such command as %s\n";
static char const       TMRBR[] = "sed: too many }'s\n";
static char const       DLABL[] = "sed: duplicate label \"%s\"\n";
static char const       TMLAB[] = "sed: too many labels \"%s\"\n";
static char const       TMWFI[] = "sed: too many w files\n";
static char const       REITL[] = "sed: RE too long: %s\n";
static char const       TMLNR[] = "sed: too many line numbers\n";
static char const       TRAIL[] = "sed: command \"%s\" has trailing garbage\n";
static char const       NEEDB[] = "sed: error processing: %s\n";
static char const       INERR[] = "sed: internal error: %s\n";
static char const       SMCNT[] = "sed: bad value for match count on s command %s\n";
static char const       UNCLS[] = "sed: invalid character class name %s\n";
static char const       *USAGE[] = { 
    "Usage: sed [-g] [-n] script file ...",
    "       sed [-g] [-n] -e script ... -f script_file ... file ...",
    "",
    "  -g: global substitute             -n: no default copy,",
    "  script: append to commands,       script_file: append contents to commands",
    "  file: a text file - lines of upto 8K bytes, \\n not needed on last line",
    "        default is to read from standard input",
    "",
    "General command format:",
    "    [addr1][,addr2] [!]function [arguments]",
    "    [addr1][,addr2] [!]s/[RE]/replace/[p|P][g][w file]",
    "",
    "Command list:",
    "    a: append,      b: branch,    c: change,   d/D: delete,",
    "    g/G: get,       h/H: hold,    i: insert,   l: list,",
    "    n/N: next,      p/P: print,   q: quit,     r: read,",
    "    s: substitute,  t/T: test,    w/W: write,  x: xchange,",
    "    y: translate,   :label,       =: number,   {}: grouping",
    "",
    "Regular expression elements:",
    "    \\n, and \\t: newline, and tab",
    "    ^: line begin,             $: line end,",
    "    [a-z$0-9]: class,          [^a-z$]: inverse-class,",
    "    .: one character,          *: 0 or more repeats,",
    "    +: one or more repeats,    \\{n,m\\} n to m repeats,",
    "    \\(..\\): subexpression,     \\1..\\9: matched subexpression,",
    "",
    "Substitution patterns:",
    "    \\n,\\t: newline and tab",
    "    &: match pattern,          \\1..\\9: matched subexpression",
    NULL
};

typedef struct                          /* represent a command label */
{
    char        *name;                  /* the label name */
    sedcmd      *last;                  /* it's on the label search list */
    sedcmd      *link;                  /* pointer to the cmd it labels */
}               label;

                                        /* label handling */
static label    labels[MAXLABS];        /* here's the label table */
                                        /* first label is end of script */
static label    *curlab = labels + 1;   /* pointer to current label */
static label    *lablst = labels;       /* header for search list */

                                        /* string pool for REs, etc. */
static char     pool[POOLSIZE];         /* the pool */
static char     *fp     = pool;         /* current pool pointer */
static char     *poolend  = pool + POOLSIZE;    /* pointer past pool end */

                                        /* compilation state */
static FILE     *cmdf   = NULL;         /* current command source */
static char     *cp     = NULL;         /* compile pointer */
static sedcmd   *cmdp   = cmds;         /* current compiled-cmd ptr */
static int      bdepth  = 0;            /* current {}-nesting level */
static int      bcount  = 0;            /* # tagged patterns in current RE */
static char     **eargv;                /* scratch copy of argument list */

/* compilation flags */
static int      eflag = 0;              /* -e option flag */
static int      gflag = 0;              /* -g option flag */

static void     compile( void );
static int      cmdcomp( register char cchar );
static char     *rhscomp( register char *rhsp, register char delim );
static char     *recomp( char *expbuf, char redelim );
static int      cmdline( register char *cbuf );
static char     *getaddress( register char *expbuf );
static void     gettext( int accept_whitespace );
static label    *search( void );
static void     resolve( void );
static char     *ycomp( register char *ep, char delim );
static void     myexit( int status );
static void     usage( void );

#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
#ifndef isblank // OW defines isblank without defining __STDC_VERSION__ W.Briscoe 20041008
static int isblank( int c )
{
    return( c == ' ' || c == '\t' );
}
#endif
#endif

/* main sequence of the stream editor */
int main( int argc, char *argv[] )
{
    static char dummy_name[] = "progend\n";

    lablst->name = dummy_name;  /* Must set so strcmp can be done */
    cmdp->addr1 = pool;         /* 1st addr expand will be at pool start */

    if( argc <= 1 )
        usage();                /* exit immediately if no arguments */
    eargc   = argc;             /* set local copy of argument count */
    eargv   = argv;             /* set local copy of argument list */
    /* scan through the arguments, interpreting each one */
    /* We dont use the OW GetOpt() or the POSIX getopt() as we want to do
    * -e i\ hello -e "s/$/ world" */
    while( --eargc > 0 && **++eargv == '-' ) {
        int const       flag = eargv[0][1];

        /* Support "folded" flags such as -ng rather than -n -g */
        if( eargv[0][2] ) {
            char    *fr = eargv[0]+2;
            char    *to;

            switch( flag ) {
            case 'e':
            case 'f':
                to = fr - 2;
                break;
            case 'g':
            case 'n':
                to = fr - 1;
                break;
            default:
                to = fr;
                break;
            }
                                /* Move up remaining data */
            while( ( *to++ = *fr++ ) != 0 ) ;
            ++eargc, --eargv;   /* Consider rest of argument "again" */
        }

        switch( flag ) {
        case 'e':
            eflag++;
            compile();          /* compile with e flag on */
            eflag = 0;
            break;              /* get another argument */
        case 'f':
            if( --eargc <= 0 )  /* barf if no -f file */
                fprintf( stderr, NEEDB, eargv[0] ), myexit( 2 );
            if( ( cmdf = fopen( *++eargv, "r" ) ) == NULL )
                fprintf( stderr, COCFI, *eargv ), myexit( 2 );
            compile();          /* file is O.K., compile it */
            fclose( cmdf );
            break;              /* go back for another argument */
        case 'g':
            gflag++;            /* set global flag on all s cmds */
            break;
        case 'n':
            nflag++;            /* no print except on p flag or w */
            break;
        default:
            fprintf( stderr, UFLAG, flag );
            eargv++, eargc--;   /* Skip garbage argument */
            break;
        }
    }
    if( cp == NULL ) {          /* no commands have been compiled */
        if( eargc <= 0 )
            usage();            /* exit immediately if no commands */
        eargv--;
        eargc++;
        eflag++;
        compile();
        eflag = 0;
        eargv++;
        eargc--;
    }

    if( bdepth )                /* we have unbalanced squigglies */
        ABORT( TMLBR );

    lablst->link = cmdp;        /* set up header of label linked list */
    resolve();                  /* resolve label table indirections */
    (void)setvbuf( stdout, NULL, _IOLBF, 0 ); /* Improve reactivity in a pipe */
    if( eargc <= 0 ) {           /* if there are no files specified */
        (void)setvbuf( stdin, NULL, _IOLBF, 0 ); /* Improve reactivity in a pipe */
        execute( NULL );        /*   execute commands on stdin only */
    } else while( --eargc >= 0 )  /* else do commands on each file specified */
        execute( *eargv++ );
    return( 0 );                /* everything was O.K. if we got here */
}

/* precompile sed commands out of a file */
static void compile( void )
{
    #define H       0x80        /* 128 bit, on if there's code for command */
    #define LOWCMD  56          /* = '8', lowest char indexed in cmdmask */

    /* indirect through this to get command internal code, if it exists */
    static char const   cmdmask[] = {
        0,     0,      H,      0,      0,       H+EQCMD, 0,           0, /* 89:;<=>? */
        0,     0,      0,      0,      H+CDCMD, 0,       0,       CGCMD, /* @ABCDEFG */
        CHCMD, 0,      0,      0,      0,       0,       CNCMD,       0, /* HIJKLMNO */
        CPCMD, 0,      0,      0,      H+CTCMD, 0,       0,     H+CWCMD, /* PQRSTUVW */
        0,     0,      0,      0,      0,       0,       0,           0, /* XYZ[\]^_ */
        0,     H+ACMD, H+BCMD, H+CCMD, DCMD,    0,       0,        GCMD, /* `abcdefg */
        HCMD,  H+ICMD, 0,      0,      H+LCMD,  0,       NCMD,        0, /* hijklmno */
        PCMD,  H+QCMD, H+RCMD, H+SCMD, H+TCMD,  0,       0,      H+WCMD, /* pqrstuvw */
        XCMD,  H+YCMD, 0,      H+BCMD, 0,       H,       0,           0, /* xyz{|}~  */
    };

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?