sedcomp.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,082 行 · 第 1/4 页

C
1,082
字号
    char            *expbuf,            /* place to compile it to */
    char            redelim )           /* RE end-marker to look for */
                                        /* uses cp, bcount */
{
    register char   *ep = expbuf;       /* current-compiled-char pointer */
    register char   *sp = cp;           /* source-character ptr */
    register int    c;                  /* current-character */
    char            negclass;           /* all-but flag */
    char            *lastep;            /* ptr to last expr compiled */
    char const      *svclass;           /* start of current char class */
    char            brnest[MAXTAGS+1];  /* bracket-nesting array */
    char            *brnestp;           /* ptr to current bracket-nest */
    char const      *pp;                /* scratch pointer */
    int             tags;               /* # of closed tags */
    char            *obr[MAXTAGS+1] = {0}; /* ep values when \( seen */
    int             opentags = 0;       /* Used to index obr */

    if( *cp == redelim )                /* if first char is RE endmarker */
        return( cp++, *ep++ = CEOF, ep ); /* use existing RE. */

    lastep = NULL;                      /* there's no previous RE */
    brnestp = brnest;                   /* initialize ptr to brnest array */
    tags = bcount = 0;                  /* initialize counters */

    if( ( *ep++ = (int)( *sp == '^' ) ) != 0 ) /* check for start-of-line syntax */
        sp++;

    for( ;; ) {
        if( ep >= expbuf + RELIMIT )    /* match is too large */
            return( cp = sp, BAD );     /* Not exercised by sedtest.mak */
        if( ( c = *sp++ ) == redelim ) {/* found the end of the RE */
            cp = sp;
            if( brnestp != brnest )     /* \(, \) unbalanced */
                return( BAD );
            *ep++ = CEOF;               /* write end-of-pattern mark */
            return( ep );               /* return ptr to compiled RE */
        }
        if( c == '*'
        ||  c == '+'
        ||  c == '\\' && *sp == '{' )   /* if we're a postfix op */
            ;
        else
            lastep = ep;                /* get ready to match last */

        switch( c ) {
        case '\\':
            switch( c = *sp++ ) {
            case '(':                   /* start tagged section */
                if( ++bcount <= MAXTAGS ) { /* bump tag count */
                    *brnestp++ = (char)bcount; /* update tag stack */
                    obr[opentags] = ep; /* Remember for /(.../)* */
                }
                opentags++;
                *ep++ = CBRA;           /* enter tag-start */
                *ep++ = (char)bcount;
                break;
            case ')':                   /* end tagged section */
                if( --opentags < 0 )    /* extra \) */
                    return( cp = sp, BAD );
                *ep++ = CKET;           /* enter end-of-tag */
                if( ++tags <= MAXTAGS ) /* count closed tags */
                    *ep++ = *--brnestp; /* pop tag stack */
                else
                    *ep++ = 0;          /* Placeholder - should not be used */
                break;
            case '\n':                  /* escaped newline no good */
                return( cp = sp, BAD );
            case 'n':                   /* match a newline */
                c = '\n';
                goto defchar;
            case 't':                   /* match a tab */
                c = '\t';
                goto defchar;
            case '\\':                  /* match a literal backslash */
                goto defchar;
            default:
                if( c >= '1' && c <= '9' ) { /* tag use */
                    if( ( c -= '0' ) > tags ) /* too few */
                        return( BAD );
                    *ep++ = CBACK;      /* enter tag mark */
                    *ep++ = (char)c;    /* and the number */
                    break;
                }
#if 1
                /* This allows \ to stop "special" even if it is not. */
                goto defchar;           /* else match \c */
#else
                /* This is IEEE 2001 behavior */
                return( cp = sp, BAD ); /* Otherwise /\?/ && /\\?/ synonymous */
#endif
            case '{':                   /* '}' should balance for vi */
                {
                    int i1 = 0;
                    int i2 = 0;

                    if( !lastep )
                        return( cp = sp, BAD ); /* rep error */
                    *lastep |= MTYPE;
                    if( !isdigit( *sp ) )
                        return( cp = sp, BAD );
                    while( isdigit( *sp ) )
                        i1 = i1*10 + *sp++ - '0';
                    if( i1 > 255 )
                        return( cp = sp, BAD );
                    *ep++ = (char)i1;
                    if( *sp == '\\' && sp[1] == /* '{' vi brace balancing */ '}' )
                        sp += 2, *ep++ = 0;
                    else if( *sp == ',' && sp[1] == '\\' && sp[2] == /* '{' vi brace balancing */ '}' )
                        sp += 3, *ep++ = (char)0xFF;
                    else if( *sp++ ==',') {
                      if ( !isdigit( *sp ) )
                          *ep++ = 255;
                      else {
                        while (isdigit( *sp ) )
                            i2 = i2*10 + *sp++ - '0';
                        *ep++ = (char)(i2-i1);
                      }
                      if( *sp != '\\' || sp[1] != /* '{' vi brace balancing */ '}' || i2 < i1 || i2 > 255 )
                          return( cp = sp, BAD );
                      sp += 2;
                    } else
                        return( cp = sp, BAD );
                }
                goto handle_cket;
            }
            break;

        case '\n':                      /* Can not happen? WFB 20040801 */
        case '\0':
            return( cp = sp, BAD );     /* incomplete regular expression */

        case '.':                       /* match any char except newline */
            *ep++ = CDOT;
            break;

        case '+':                       /* 1 to n repeats of previous pattern */
            if( lastep == NULL )        /* if + not first on line */
                goto defchar;           /*   match a literal + */
#if 0                                   /* Removed constraint WFB 20040804 */
            if( *lastep == CKET )       /* can't iterate a tag */
                return( cp = sp, BAD );
#endif
            pp = ep;                    /* else save old ep */
            while( lastep < pp )        /* so we can blt the pattern */
                *ep++ = *lastep++;
            *lastep |= STAR;            /* flag the copy */
            break;

        case '*':                       /* 0..n repeats of previous pattern */
            if( lastep == NULL )        /* if * isn't first on line */
                goto defchar;           /*   match a literal * */
#if 0                                   /* Removed constraint WFB 20040804 */
            if( *lastep == CKET )       /* can't iterate a tag */
                return( cp = sp, BAD );
#endif
            *lastep |= STAR;            /* flag previous pattern */
            goto handle_cket;

        case '$':                       /* match only end-of-line */
            if( *sp != redelim )        /* if we're not at end of RE */
                goto defchar;           /*   match a literal $ */
            *ep++ = CDOL;               /* insert end-symbol mark */
            break;

        case '[':                       /* begin character set pattern */
            {
                int                     classct; /* class element count */
                register unsigned       uc; /* current-character */

                if( ep + 17 >= expbuf + RELIMIT )
                    ABORT( REITL );     /* Not exercised by sedtest.mak */
                *ep++ = CCL;            /* insert class mark */
                if( ( negclass = (int)( ( uc = *sp++ ) == '^' ) ) != 0 )
                    uc = *sp++;
                svclass = sp;           /* save ptr to class start */
                do {
                    switch( uc ) {
                    case '\0':
                        ABORT( CGMSG );
                    case '-':           /* handle character ranges */
                        if( sp > svclass && *sp != ']' ) {
                            unsigned const  lo = (unsigned)*( sp - 2 );
                            unsigned const  hi = (unsigned)*sp;
                            if( lo > hi )
                                ABORT( CGMSG );
                            if( sp[1] == '-' && sp[2] != ']' )
                                ABORT( CGMSG );
                            for( uc = lo; uc <= hi; uc++ )
                                ep[uc >> 3] |= bits[uc & 7];
                            continue;
                        }
                        break;
                    case '\\':          /* handle escape sequences in sets */
                        if( ( uc = *sp++ ) == 'n' )
                            uc = '\n';
                        else if( uc == 't' )
                            uc = '\t';
                        else
                            --sp, uc = '\\'; /* \n and \t are special, \* is not */
                    case '[':           /* Handle named character class */
                        if( *sp == ':' ) {
                            static const struct {
                                const char *s;
                                int ( *isf )( int c );
                            } t[] = {
                                { "alnum:]", isalnum },
                                { "alpha:]", isalpha },
                                { "blank:]", isblank },
                                { "cntrl:]", iscntrl },
                                { "digit:]", isdigit },
                                { "graph:]", isgraph },
                                { "lower:]", islower },
                                { "print:]", isprint },
                                { "punct:]", ispunct },
                                { "space:]", isspace },
                                { "upper:]", isupper },
                                { "xdigit:]", isxdigit },
                            };
                            size_t si;
                            int ( *isf )( int c );

                            for( si = 0; si < sizeof t / sizeof *t; si++ )
                                if( strncmp( sp+1, t[si].s, strlen( t[si].s ) ) == 0 )
                                    break;
                            if( si >= sizeof t / sizeof *t ) /* Add class */
                                ABORT( UNCLS );
                            sp += 1 + strlen( t[si].s );
                            for( isf = t[si].isf, uc = 1; uc < 128; uc++ )
                                if( isf( (int)uc ) )
                                    ep[uc >> 3] |= bits[uc & 7];
                            continue;
                        }
                        break;
                    }
                                        /* add (maybe translated) char to set */
                    ep[uc >> 3] |= bits[uc & 7];
                } while ( ( uc = *sp++ ) != ']' );
                                        /* invert bitmask if all-but needed */
                if( negclass )
                    for( classct = 0; classct < 16; classct++ )
                        ep[classct] ^= 0xFF;
                ep[0] &= 0xFE;          /* never match ASCII 0 */
                ep += 16;               /* advance ep past set mask */
            }
            break;

        defchar:                        /* match literal character */
        default:                        /* which is what we'd do by default */
            *ep++ = CCHR;               /* insert character mark */
            *ep++ = (char)c;
            break;

        handle_cket:
            switch( *lastep & ~STAR & ~MTYPE ) {
            case CCHR:
            case CDOT:
            case CCL:
                break;
            case CBRA:
                return( cp = sp, BAD );
            case CKET: {
                    /* Make room to insert skip marker in expbuf */
                    char * const    firstep = obr[opentags];
                    int const       width = ep - firstep;
                    int             i = width;

                    if( width >= 256 )
                        ABORT( REITL ); /* Not exercised by sedtest.mak */
                    *firstep |= ( *lastep ^ CKET ) ; /* Mark \( as * or \{ terminated */
                    while( --i >= 2 )
                        firstep[i+1] = firstep[i];

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?