📄 cs_regexp.cpp
字号:
{
/*
* Don't set startp if some later
* invocation of the same parentheses
* already has.
*/
if ( evars.regstartp [ no ] == NULL )
evars.regstartp [ no ] = save;
return ( 1 );
}
else
return ( 0 );
}
break;
case CLOSE + 1:
case CLOSE + 2:
case CLOSE + 3:
case CLOSE + 4:
case CLOSE + 5:
case CLOSE + 6:
case CLOSE + 7:
case CLOSE + 8:
case CLOSE + 9:
{
register int no;
register char *save;
no = OP ( scan ) - CLOSE;
save = evars.reginput;
if ( regmatch ( evars, next ) )
{
/*
* Don't set endp if some later
* invocation of the same parentheses
* already has.
*/
if ( evars.regendp [ no ] == NULL )
evars.regendp [ no ] = save;
return ( 1 );
}
else
return ( 0 );
}
break;
case BRANCH:
{
register char *save;
if ( OP ( next ) != BRANCH ) /* No choice. */
next = OPERAND ( scan ); /* Avoid recursion. */
else
{
do
{
save = evars.reginput;
if ( regmatch ( evars, OPERAND ( scan ) ) )
return ( 1 );
evars.reginput = save;
scan = regnext ( scan );
}
while ( scan != NULL && OP ( scan ) == BRANCH );
return ( 0 );
/* NOTREACHED */
}
}
break;
case STAR:
case PLUS:
{
register char nextch;
register int no;
register char *save;
register int min;
/*
* Lookahead to avoid useless match attempts
* when we know what character comes next.
*/
nextch = '\0';
if ( OP ( next ) == EXACTLY )
nextch = *OPERAND ( next );
min = ( OP ( scan ) == STAR ) ? 0 : 1;
save = evars.reginput;
no = regrepeat ( evars, OPERAND ( scan ) );
while ( no >= min )
{
/* If it could work, try it. */
if ( nextch == '\0' || *evars.reginput == nextch )
if ( regmatch ( evars, next ) )
return ( 1 );
/* Couldn't or didn't -- back up. */
no--;
evars.reginput = save + no;
}
return ( 0 );
}
break;
case END:
return ( 1 ); /* Success! */
break;
default:
FAIL ( "memory corruption" );
return ( 0 );
break;
}
scan = next;
}
/*
* We get here only if there's trouble -- normally "case END" is
* the terminating point.
*/
FAIL ( "corrupted pointers" );
return ( 0 );
}
/*
- regrepeat - repeatedly match something simple, report how many
*/
int
regexp::regrepeat ( exec_vars& evars, char* p )
{
int count = 0;
const char *scan;
const char *opnd;
scan = evars.reginput;
opnd = OPERAND ( p );
switch ( OP ( p ) )
{
case ANY:
count = strlen ( scan );
scan += count;
break;
case EXACTLY:
while ( *opnd == *scan )
{
count++;
scan++;
}
break;
case ANYOF:
while ( *scan != '\0' && strchr ( opnd, *scan ) != NULL )
{
count++;
scan++;
}
break;
case ANYBUT:
while ( *scan != '\0' && strchr ( opnd, *scan ) == NULL )
{
count++;
scan++;
}
break;
default: /* Oh dear. Called inappropriately. */
FAIL ( "internal foulup" );
count = 0; /* Best compromise. */
break;
}
evars.reginput = const_cast<char *> ( scan );
return ( count );
}
/*
- regnext - dig the "next" pointer out of a node
*/
char *
regexp::regnext ( char* p )
{
int offset;
if ( p == ®dummy )
return ( NULL );
offset = NEXT ( p );
if ( offset == 0 )
return ( NULL );
if ( OP ( p ) == BACK )
return ( p - offset );
else
return ( p + offset );
}
int
regexp::count()
{
for ( int i = 0; i < NSUBEXP; i++ )
if (startp [ i ] == 0 || endp [ i ] == 0 )
return i;
return 0;
}
string
regexp::operator[] ( int index )
{
if ( index < 0 || index >= NSUBEXP )
return string();
if ( startp [ index ] != 0 && endp [ index ] != 0)
return string ( startp [ index ], endp [ index ] - startp [ index ] );
return string();
}
void
regexp::clean()
{
for ( int i = 0; i < NSUBEXP; i++ )
startp [ i ] = endp [ i ] = 0;
delete program;
}
regexp::regexp(): program ( 0 )
{
clean();
}
regexp::~regexp()
{
clean();
}
char regexp::regdummy = 0;
//|
//| find all possible sub-strings that matches with the regular expression
//|
bool
regexp::split ( const char *str, array<string>& all_matches )
{
// clear up anything in the array
all_matches.clear();
const char *p = str;
bool rc = false; // assume no matches
// match any sub-string
while ( exec ( p ) )
{
rc = true;
string s ( startp [ 0 ], endp [ 0 ] - startp [ 0 ] );
all_matches.push ( s );
p = endp [ 0 ];
// check if we have reached the end of the string.
if ( ( *p == 0 ) || ( *( p + 1 ) == 0 ) )
{
break;
}
}
return rc;
}
/*
- regsub - perform substitutions after a regexp match
string regexp::subst ( const char* source )
{
char *src;
string dst;
char c;
int no;
int len;
if ( source == NULL || dest == NULL )
{
FAIL ( "NULL parm to regsub" );
return;
}
if ( UCHARAT ( program ) != MAGIC )
{
FAIL ( "damaged regexp fed to regsub" );
return;
}
src = source;
while ( ( c = *src++ ) != '\0' )
{
if ( c == '&' )
no = 0;
else if ( c == '\\' && '0' <= *src && *src <= '9' )
no = *src++ - '0';
else
no = -1;
if ( no < 0 )
{
// Ordinary character.
if ( c == '\\' && ( *src == '\\' || *src == '&' ) )
c = *src++;
dst += c;
}
else if ( prog->startp [ no ] != NULL && prog->endp [ no ] != NULL )
{
len = prog->endp [ no ] - prog->startp [ no ];
dst.add ( startp [ no ], len );
//if ( len != 0 && *( dst - 1 ) == '\0' ) // strncpy hit NUL.
//{
// FAIL ( "damaged match string" );
// return;
//}
}
}
return dst;
}
*/
#ifdef DEBUG
STATIC char *regprop();
/*
- regdump - dump a regexp onto stdout in vaguely comprehensible form
*/
void
regdump ( regexp* r )
{
register char *s;
register char op = EXACTLY; /* Arbitrary non-END op. */
register char *next;
s = r->program + 1;
while ( op != END )
{
/* While that wasn't END last time... */
op = OP ( s );
printf ( "%2d%s", s - r->program, regprop ( s )); /* Where, what. */
next = regnext ( s );
if ( next == NULL ) /* Next ptr. */
printf ( "(0)" );
else
printf ( "(%d)", ( s - r->program ) + ( next - s ) );
s += 3;
if ( op == ANYOF || op == ANYBUT || op == EXACTLY )
{
/* Literal string, where present. */
while ( *s != '\0' )
{
putchar ( *s );
s++;
}
s++;
}
putchar ( '\n' );
}
/* Header fields of interest. */
if ( r->regstart != '\0' )
printf ( "start `%c' ", r->regstart );
if ( r->reganch )
printf ( "anchored " );
if ( r->regmust != NULL )
printf ( "must have \"%s\"", r->regmust );
printf ( "\n" );
}
/*
- regprop - printable representation of opcode
*/
static char *
regprop ( char* op )
{
register char *p;
static char buf [ 50 ];
(void) strcpy ( buf, ":" );
switch ( OP ( op ) )
{
case BOL:
p = "BOL";
break;
case EOL:
p = "EOL";
break;
case ANY:
p = "ANY";
break;
case ANYOF:
p = "ANYOF";
break;
case ANYBUT:
p = "ANYBUT";
break;
case BRANCH:
p = "BRANCH";
break;
case EXACTLY:
p = "EXACTLY";
break;
case NOTHING:
p = "NOTHING";
break;
case BACK:
p = "BACK";
break;
case END:
p = "END";
break;
case OPEN + 1:
case OPEN + 2:
case OPEN + 3:
case OPEN + 4:
case OPEN + 5:
case OPEN + 6:
case OPEN + 7:
case OPEN + 8:
case OPEN + 9:
sprintf ( buf + strlen ( buf ), "OPEN%d", OP ( op ) - OPEN );
p = NULL;
break;
case CLOSE + 1:
case CLOSE + 2:
case CLOSE + 3:
case CLOSE + 4:
case CLOSE + 5:
case CLOSE + 6:
case CLOSE + 7:
case CLOSE + 8:
case CLOSE + 9:
sprintf ( buf + strlen ( buf ), "CLOSE%d", OP ( op ) - CLOSE );
p = NULL;
break;
case STAR:
p = "STAR";
break;
case PLUS:
p = "PLUS";
break;
default:
FAIL ( "corrupted opcode" );
break;
}
if ( p != NULL )
(void) strcat ( buf, p );
return ( buf );
}
#endif
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -