sedcomp.c
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,082 行 · 第 1/4 页
C
1,082 行
char *expbuf, /* place to compile it to */
char redelim ) /* RE end-marker to look for */
/* uses cp, bcount */
{
register char *ep = expbuf; /* current-compiled-char pointer */
register char *sp = cp; /* source-character ptr */
register int c; /* current-character */
char negclass; /* all-but flag */
char *lastep; /* ptr to last expr compiled */
char const *svclass; /* start of current char class */
char brnest[MAXTAGS+1]; /* bracket-nesting array */
char *brnestp; /* ptr to current bracket-nest */
char const *pp; /* scratch pointer */
int tags; /* # of closed tags */
char *obr[MAXTAGS+1] = {0}; /* ep values when \( seen */
int opentags = 0; /* Used to index obr */
if( *cp == redelim ) /* if first char is RE endmarker */
return( cp++, *ep++ = CEOF, ep ); /* use existing RE. */
lastep = NULL; /* there's no previous RE */
brnestp = brnest; /* initialize ptr to brnest array */
tags = bcount = 0; /* initialize counters */
if( ( *ep++ = (int)( *sp == '^' ) ) != 0 ) /* check for start-of-line syntax */
sp++;
for( ;; ) {
if( ep >= expbuf + RELIMIT ) /* match is too large */
return( cp = sp, BAD ); /* Not exercised by sedtest.mak */
if( ( c = *sp++ ) == redelim ) {/* found the end of the RE */
cp = sp;
if( brnestp != brnest ) /* \(, \) unbalanced */
return( BAD );
*ep++ = CEOF; /* write end-of-pattern mark */
return( ep ); /* return ptr to compiled RE */
}
if( c == '*'
|| c == '+'
|| c == '\\' && *sp == '{' ) /* if we're a postfix op */
;
else
lastep = ep; /* get ready to match last */
switch( c ) {
case '\\':
switch( c = *sp++ ) {
case '(': /* start tagged section */
if( ++bcount <= MAXTAGS ) { /* bump tag count */
*brnestp++ = (char)bcount; /* update tag stack */
obr[opentags] = ep; /* Remember for /(.../)* */
}
opentags++;
*ep++ = CBRA; /* enter tag-start */
*ep++ = (char)bcount;
break;
case ')': /* end tagged section */
if( --opentags < 0 ) /* extra \) */
return( cp = sp, BAD );
*ep++ = CKET; /* enter end-of-tag */
if( ++tags <= MAXTAGS ) /* count closed tags */
*ep++ = *--brnestp; /* pop tag stack */
else
*ep++ = 0; /* Placeholder - should not be used */
break;
case '\n': /* escaped newline no good */
return( cp = sp, BAD );
case 'n': /* match a newline */
c = '\n';
goto defchar;
case 't': /* match a tab */
c = '\t';
goto defchar;
case '\\': /* match a literal backslash */
goto defchar;
default:
if( c >= '1' && c <= '9' ) { /* tag use */
if( ( c -= '0' ) > tags ) /* too few */
return( BAD );
*ep++ = CBACK; /* enter tag mark */
*ep++ = (char)c; /* and the number */
break;
}
#if 1
/* This allows \ to stop "special" even if it is not. */
goto defchar; /* else match \c */
#else
/* This is IEEE 2001 behavior */
return( cp = sp, BAD ); /* Otherwise /\?/ && /\\?/ synonymous */
#endif
case '{': /* '}' should balance for vi */
{
int i1 = 0;
int i2 = 0;
if( !lastep )
return( cp = sp, BAD ); /* rep error */
*lastep |= MTYPE;
if( !isdigit( *sp ) )
return( cp = sp, BAD );
while( isdigit( *sp ) )
i1 = i1*10 + *sp++ - '0';
if( i1 > 255 )
return( cp = sp, BAD );
*ep++ = (char)i1;
if( *sp == '\\' && sp[1] == /* '{' vi brace balancing */ '}' )
sp += 2, *ep++ = 0;
else if( *sp == ',' && sp[1] == '\\' && sp[2] == /* '{' vi brace balancing */ '}' )
sp += 3, *ep++ = (char)0xFF;
else if( *sp++ ==',') {
if ( !isdigit( *sp ) )
*ep++ = 255;
else {
while (isdigit( *sp ) )
i2 = i2*10 + *sp++ - '0';
*ep++ = (char)(i2-i1);
}
if( *sp != '\\' || sp[1] != /* '{' vi brace balancing */ '}' || i2 < i1 || i2 > 255 )
return( cp = sp, BAD );
sp += 2;
} else
return( cp = sp, BAD );
}
goto handle_cket;
}
break;
case '\n': /* Can not happen? WFB 20040801 */
case '\0':
return( cp = sp, BAD ); /* incomplete regular expression */
case '.': /* match any char except newline */
*ep++ = CDOT;
break;
case '+': /* 1 to n repeats of previous pattern */
if( lastep == NULL ) /* if + not first on line */
goto defchar; /* match a literal + */
#if 0 /* Removed constraint WFB 20040804 */
if( *lastep == CKET ) /* can't iterate a tag */
return( cp = sp, BAD );
#endif
pp = ep; /* else save old ep */
while( lastep < pp ) /* so we can blt the pattern */
*ep++ = *lastep++;
*lastep |= STAR; /* flag the copy */
break;
case '*': /* 0..n repeats of previous pattern */
if( lastep == NULL ) /* if * isn't first on line */
goto defchar; /* match a literal * */
#if 0 /* Removed constraint WFB 20040804 */
if( *lastep == CKET ) /* can't iterate a tag */
return( cp = sp, BAD );
#endif
*lastep |= STAR; /* flag previous pattern */
goto handle_cket;
case '$': /* match only end-of-line */
if( *sp != redelim ) /* if we're not at end of RE */
goto defchar; /* match a literal $ */
*ep++ = CDOL; /* insert end-symbol mark */
break;
case '[': /* begin character set pattern */
{
int classct; /* class element count */
register unsigned uc; /* current-character */
if( ep + 17 >= expbuf + RELIMIT )
ABORT( REITL ); /* Not exercised by sedtest.mak */
*ep++ = CCL; /* insert class mark */
if( ( negclass = (int)( ( uc = *sp++ ) == '^' ) ) != 0 )
uc = *sp++;
svclass = sp; /* save ptr to class start */
do {
switch( uc ) {
case '\0':
ABORT( CGMSG );
case '-': /* handle character ranges */
if( sp > svclass && *sp != ']' ) {
unsigned const lo = (unsigned)*( sp - 2 );
unsigned const hi = (unsigned)*sp;
if( lo > hi )
ABORT( CGMSG );
if( sp[1] == '-' && sp[2] != ']' )
ABORT( CGMSG );
for( uc = lo; uc <= hi; uc++ )
ep[uc >> 3] |= bits[uc & 7];
continue;
}
break;
case '\\': /* handle escape sequences in sets */
if( ( uc = *sp++ ) == 'n' )
uc = '\n';
else if( uc == 't' )
uc = '\t';
else
--sp, uc = '\\'; /* \n and \t are special, \* is not */
case '[': /* Handle named character class */
if( *sp == ':' ) {
static const struct {
const char *s;
int ( *isf )( int c );
} t[] = {
{ "alnum:]", isalnum },
{ "alpha:]", isalpha },
{ "blank:]", isblank },
{ "cntrl:]", iscntrl },
{ "digit:]", isdigit },
{ "graph:]", isgraph },
{ "lower:]", islower },
{ "print:]", isprint },
{ "punct:]", ispunct },
{ "space:]", isspace },
{ "upper:]", isupper },
{ "xdigit:]", isxdigit },
};
size_t si;
int ( *isf )( int c );
for( si = 0; si < sizeof t / sizeof *t; si++ )
if( strncmp( sp+1, t[si].s, strlen( t[si].s ) ) == 0 )
break;
if( si >= sizeof t / sizeof *t ) /* Add class */
ABORT( UNCLS );
sp += 1 + strlen( t[si].s );
for( isf = t[si].isf, uc = 1; uc < 128; uc++ )
if( isf( (int)uc ) )
ep[uc >> 3] |= bits[uc & 7];
continue;
}
break;
}
/* add (maybe translated) char to set */
ep[uc >> 3] |= bits[uc & 7];
} while ( ( uc = *sp++ ) != ']' );
/* invert bitmask if all-but needed */
if( negclass )
for( classct = 0; classct < 16; classct++ )
ep[classct] ^= 0xFF;
ep[0] &= 0xFE; /* never match ASCII 0 */
ep += 16; /* advance ep past set mask */
}
break;
defchar: /* match literal character */
default: /* which is what we'd do by default */
*ep++ = CCHR; /* insert character mark */
*ep++ = (char)c;
break;
handle_cket:
switch( *lastep & ~STAR & ~MTYPE ) {
case CCHR:
case CDOT:
case CCL:
break;
case CBRA:
return( cp = sp, BAD );
case CKET: {
/* Make room to insert skip marker in expbuf */
char * const firstep = obr[opentags];
int const width = ep - firstep;
int i = width;
if( width >= 256 )
ABORT( REITL ); /* Not exercised by sedtest.mak */
*firstep |= ( *lastep ^ CKET ) ; /* Mark \( as * or \{ terminated */
while( --i >= 2 )
firstep[i+1] = firstep[i];
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?