sedexec.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 911 行 · 第 1/3 页

C
911
字号
/*
sedexec.c -- execute compiled form of stream editor commands

   The single entry point of this module is the function execute(). It
may take a string argument (the name of a file to be used as text)  or
the argument NULL which tells it to filter standard input. It executes
the compiled commands in cmds[] on each line in turn.
   The function command() does most of the work. Match() and advance()
are used for matching text against precompiled regular expressions and
dosub() does right-hand-side substitution.  Getline() does text input;
readout() and memeql() are output and string-comparison utilities.

==== Written for the GNU operating system by Eric S. Raymond ====

18NOV86 Fixed bug in 'selected()' that prevented address ranges from
    working.                - Billy G. Allie.
21FEB88 Refixed bug in 'selected()'     - Charles Marslett
*/

#include <assert.h>
#include <io.h>                         /* isatty() */
#include <stdio.h>                      /* {f}puts, {f}printf, etc. */
#include <ctype.h>                      /* isprint(), isdigit(), toascii() */
#include <stdlib.h>                     /* for exit() */
#include "sed.h"                        /* command structures & constants */

#define MAXHOLD         MAXBUF          /* size of the hold space */
#define GENSIZ          MAXBUF          /* maximum genbuf size */

#define TRUE            1
#define FALSE           0

#if 0
/* LTLMSG was used when buffer overflow stopped sed */
static char const       LTLMSG[] = "sed: line too long \"%.*s\"\n";
#define ABORTEX(msg) fprintf( stderr, msg, sizeof genbuf, genbuf ), exit( 2 )
#endif

static char const       FRENUL[] = "sed: first RE must be non-null\n";
static char const       NOROOM[] = "sed: can only fit %d bytes at line %ld\n";
static char const       INTERR[] = "sed: internal error: %s\n";

static char     *spend;                 /* current end-of-line-buffer pointer */
static long     lnum = 0L;              /* current source line number */

                                        /* append buffer maintenance */
static sedcmd   *appends[MAXAPPENDS];   /* array of ptrs to a,i,c commands */
static sedcmd   **aptr = appends;       /* ptr to current append */

                                        /* genbuf and its pointers */
static char     genbuf[GENSIZ];
static char     *loc1;                  /* Where match() tried to find a BRE */
static char     *loc2;                  /* Immediately after advance() completing match() or last character to remove in dosub() */
static char     *locs;                  /* match() sets this as a backtrack backstop */

                                        /* command-logic flags */
static int      lastline;               /* do-line flag */
static int      jump;                   /* jump to cmd's link address if set */
static int      delete;                 /* delete command flag */

                                        /* tagged-pattern tracking */
static char     *bracend[MAXTAGS+1];    /* tagged pattern start pointers */
static char     *brastart[MAXTAGS+1];   /* tagged pattern end pointers */
static sedcmd   *pending = NULL;        /* next command to be executed */

static int      selected( sedcmd *ipc );
static int      match( char *expbuf, int gf, int is_cnt );
static int      advance( register char *lp, register char *ep );
static int      substitute( sedcmd const *ipc );
static void     dosub( char const *rhsbuf );
static char     *place( register char *asp, register char const *al1,
    register char const *al2 );
static void     listto( register char const *p1, FILE *fp );
static void     command( sedcmd *ipc );
static char     *getline( register char *buf );
static int      memeql( register char const *a, register char const *b, int count );
static void     readout( void );

/* execute the compiled commands in cmds[] on a file */
void execute( const char *file )        /* name of text source file to filter */
{
    register char const *p1;            /* dummy copy ptrs */
    register sedcmd     *ipc;           /* ptr to current command */
    char                *execp;         /* ptr to source */

    if( file != NULL ) {                /* filter text from a named file */
        if( freopen( file, "r", stdin ) == NULL )
            fprintf( stderr, "sed: can't open %s\n", file );
    } else
        if( isatty( fileno( stdin ) ) ) /* It is easy to be spuriously awaiting input */
            fprintf( stderr, "sed: reading from terminal\n" );

    if( pending ) {                     /* there's a command waiting */
        ipc = pending;                  /* it will be first executed */
        pending = NULL;                 /* turn off the waiting flag */
        goto doit;                      /* go to execute it immediately */
    }
                                        /* the main command-execution loop */
    for( ;; ) {
                                        /* get next line to filter */
                                        /* jump is set but not cleared by D */
        if( ( execp = getline( jump ? spend : linebuf ) ) == BAD ) {
            if( jump ) {
                for( p1 = linebuf; p1 < spend; p1++ )
                    putc( *p1, stdout );
                putc( '\n', stdout );
            }
            return;
        }
        jump = FALSE;
        spend = execp;
                                        /* compiled commands execute loop */
        for( ipc = cmds; ipc->command; ipc++ ) {
            if( !selected( ipc ) )
                continue;
        doit:
            command( ipc );             /* execute the command pointed at */

            if( delete )                /* if delete flag is set */
                break;                  /* don't exec rest of compiled cmds */

            if( jump ) {                /* if jump set, follow cmd's link */
                jump = FALSE;
                if( ( ipc = ipc->u.link ) == NULL )
                    break;
            }
        }
                                        /* all commands now done on the line */
                                        /* output the transformed line is */
        if( !nflag && !delete ) {
            for( p1 = linebuf; p1 < spend; p1++ )
                putc( *p1, stdout );
            putc( '\n', stdout );
        }

        readout();                      /* emit any append text */

        delete = FALSE;                 /* clear delete flag; get next cmd */
    }
}

/* is current command selected */
static int selected( sedcmd *ipc )
{
    register char               *p1 = ipc->addr1;       /* first address */
    register char * const       p2 = ipc->addr2;        /*   and second */
    char                        c;
    int const                   allbut = ipc->flags.allbut;

    if( !p1 )
        return( !allbut );

    if( ipc->flags.inrange ) {
        if( *p2 == CEND )
            p1 = NULL;
        else if( *p2 == CLNUM ) {
            c = p2[1];
            if( lnum > linenum[c] ) {
                ipc->flags.inrange = FALSE;
                return( allbut );
            }
            if( lnum == linenum[c] )
                ipc->flags.inrange = FALSE;
        } else if( match( p2, 0, 0 ) )
            ipc->flags.inrange = FALSE;
    } else if( *p1 == CEND ) {
        if( !lastline )
            return( allbut );
    }
    else if( *p1 == CLNUM ) {
        c = p1[1];
        if( lnum != linenum[c] )
            return( allbut );
        if( p2 )
            ipc->flags.inrange = TRUE;
    } else if( match( p1, 0, 0 ) ) {
        if( p2 )
            ipc->flags.inrange = TRUE;
    } else {
        return( allbut );
    }
    return( !allbut );
}

/* match RE at expbuf against linebuf; if gf set, copy linebuf from genbuf */
static int match(
    char                *expbuf,
    int                 gf,
    int                 is_cnt )
{
    register char       *p1;
    register char       *p2;
    register char       c;
    static char         *lastre = NULL; /* old RE pointer */

    if( *expbuf == CEOF ) {
        if( lastre == NULL )
            fprintf( stderr, "%s", FRENUL ), exit( 2 ); /* no previous RE */
        expbuf = lastre;
    }
    else
        lastre = expbuf;

    if( gf ) {
        if( *expbuf )
            return( FALSE );
        p1 = linebuf;
        p2 = genbuf;
        while( ( *p1++ = *p2++ ) != 0 ) ;
        locs = p1 = loc2;
    } else {
        p1 = ( is_cnt ) ? loc2 : linebuf;
        locs = NULL;
    }

    p2 = expbuf;
    if( *p2++ ) {
        loc1 = p1;
        if( *p2 == CCHR && p2[1] != *p1 ) /* 1st char is wrong */
            return( FALSE );            /*   so fail */
        return( advance( p1, p2 ) );    /* else try to match rest */
    }
                                        /* literal 1st character quick check */
    if( *p2 == CCHR ) {
        c = p2[1];                      /* pull out character to search for */
        do {
            if( *p1 == c )              /* scan the source string */
                if( advance( p1, p2 ) ) /* found it, match the rest */
                    return( loc1 = p1, 1 );
        } while( *p1++ );
        return( FALSE );                /* didn't find that first char */
    }
                                        /* else try unanchored pattern match */
    do {
        if( advance( p1, p2 ) )
            return( loc1 = p1, 1 );
    } while( *p1++ );
                                        /* didn't match either way */
    return( FALSE );
}

/* attempt to advance match pointer by one pattern element */
static int advance(
    register char       *lp,            /* source (linebuf) ptr */
    register char       *ep )           /* regular expression element ptr */
{
    register char const *curlp;         /* save ptr for closures */
    char                c;              /* scratch character holder */
    char                *bbeg;
    char                *tep;
    int                 ct;
    int                 i1;
    int                 i2;

    for( ;; )
        switch( *ep++ ) {
        case CCHR:                      /* literal character */
            if( *ep++ != *lp++ )        /* if chars unequal */
                return( FALSE );        /* return false */
            break;                      /* matched */

        case CDOT:                      /* anything but NUL */
            if( *lp++ == 0 )            /* first NUL is at EOL */
                return( FALSE );        /* return false */
            break;                      /* matched */

        case CNL:                       /* start-of-line */
        case CDOL:                      /* end-of-line */
            if( *lp != 0 )              /* found that first NUL? */
                return( FALSE );        /* return false */
            break;                      /* matched */

        case CEOF:                      /* end-of-address mark */
            loc2 = lp;                  /* set second loc */
            return( TRUE );             /* return true */

        case CCL:                       /* a closure */
            c = *lp++ &0177;
            if( !( ep[c >> 3] & bits[c & 07] ) ) /* is char in set? */
                return( FALSE );        /* return false */
            ep += 16;                   /* skip rest of bitmask */
            break;                      /*   and keep going */

        case CBRA:                      /* start of tagged pattern */
            brastart[*ep++] = lp;       /* mark it */
            break;                      /* and go */

        case CKET:                      /* end of tagged pattern */
            bracend[*ep++] = lp;        /* mark it */
            break;                      /* and go */

        case CBACK:
            bbeg = brastart[*ep];
            ct = bracend[*ep++] - bbeg;

            if( !memeql( bbeg, lp, ct ) )
                return( FALSE );        /* return false */
            lp += ct;
            break;                      /* matched */

        case CBACK | STAR:
            bbeg = brastart[*ep];
            if( ( ct = bracend[*ep++] - bbeg ) <= 0 )
                break;                  /* zero (or negative ??) length match */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?