📄 regexp.cpp
字号:
if (regnarrate)
_ftprintf(stderr, _T( "%s...\n" ), regprop(scan));
#endif
next = regnext(scan);
switch (OP(scan))
{
case BOL:
if (reginput != regbol)
return false;
break;
case EOL:
if (*reginput != '\0')
return false;
break;
case WORDA:
/* Must be looking at a letter, digit, or _ */
if ((!isalnum(*reginput)) && *reginput != '_')
return(0);
/* Prev must be BOL or nonword */
if (reginput > regbol &&
(isalnum(reginput[-1]) || reginput[-1] == '_'))
return(0);
break;
case WORDZ:
/* Must be looking at non letter, digit, or _ */
if (isalnum(*reginput) || *reginput == '_')
return(0);
/* We don't care what the previous char was */
break;
case ANY:
if (*reginput == '\0')
return false;
reginput++;
break;
case EXACTLY:
{
size_t len;
LPTSTR const opnd = OPERAND(scan);
// Inline the first character, for speed.
if (*opnd != *reginput)
return false;
len = _tcslen(opnd);
if (len > 1 && _tcsncmp(opnd, reginput, len) != 0)
return false;
reginput += len;
break;
}
case ANYOF:
if (*reginput == '\0' ||
_tcschr(OPERAND(scan), *reginput) == NULL)
return false;
reginput++;
break;
case ANYBUT:
if (*reginput == '\0' ||
_tcschr(OPERAND(scan), *reginput) != NULL)
return false;
reginput++;
break;
case NOTHING:
break;
case BACK:
break;
case OPEN+1: case OPEN+2: case OPEN+3:
case OPEN+4: case OPEN+5: case OPEN+6:
case OPEN+7: case OPEN+8: case OPEN+9:
{
const int no = OP(scan) - OPEN;
LPTSTR const input = reginput;
if (regmatch(next))
{
// Don't set startp if some later
// invocation of the same parentheses
// already has.
if (regstartp[no] == NULL)
regstartp[no] = input;
return true;
}
else
return false;
break;
}
case CLOSE+1: case CLOSE+2: case CLOSE+3:
case CLOSE+4: case CLOSE+5: case CLOSE+6:
case CLOSE+7: case CLOSE+8: case CLOSE+9:
{
const int no = OP(scan) - CLOSE;
LPTSTR const input = reginput;
if (regmatch(next))
{
// Don't set endp if some later
// invocation of the same parentheses
// already has.
if (regendp[no] == NULL)
regendp[no] = input;
return true;
}
else
return false;
break;
}
case BRANCH:
{
LPTSTR const save = reginput;
if (OP(next) != BRANCH) // No choice.
next = OPERAND(scan); // Avoid recursion.
else
{
while (OP(scan) == BRANCH)
{
if (regmatch(OPERAND(scan)))
return true;
reginput = save;
scan = regnext(scan);
}
return false;
// NOTREACHED
}
break;
}
case STAR: case PLUS:
{
const TCHAR nextch = (OP(next) == EXACTLY) ? *OPERAND(next) : '\0';
size_t no;
LPTSTR const save = reginput;
const size_t min = (OP(scan) == STAR) ? 0 : 1;
for (no = regrepeat(OPERAND(scan)) + 1; no > min; no--)
{
reginput = save + no - 1;
// If it could work, try it.
if (nextch == '\0' || *reginput == nextch)
if (regmatch(next))
return true;
}
return false;
break;
}
case END:
return true; // Success!
break;
default:
regerror( REGERR_CORRUPTION );
return false;
break;
}
}
// We get here only if there's trouble -- normally "case END" is
// the terminating point.
regerror( REGERR_CORRUPTED_POINTERS );
return false;
}
// regrepeat - report how many times something simple would match
size_t CRegExecutor::regrepeat( LPTSTR node )
{
size_t count;
LPTSTR scan;
TCHAR ch;
switch (OP(node))
{
case ANY:
return(_tcslen(reginput));
break;
case EXACTLY:
ch = *OPERAND(node);
count = 0;
for (scan = reginput; *scan == ch; scan++)
count++;
return(count);
break;
case ANYOF:
return(_tcsspn(reginput, OPERAND(node)));
break;
case ANYBUT:
return(_tcscspn(reginput, OPERAND(node)));
break;
default: // Oh dear. Called inappropriately.
regerror( REGERR_BAD_REGREPEAT );
return(0); // Best compromise.
break;
}
// NOTREACHED
}
#ifdef _RE_DEBUG
// regdump - dump a regexp onto stdout in vaguely comprehensible form
void regexp::regdump()
{
LPTSTR s;
TCHAR op = EXACTLY; // Arbitrary non-END op.
LPTSTR next;
s = _tcsinc(program);
while (op != END)
{ // While that wasn't END last time...
op = OP(s);
_tprintf(_T( "%2d%s" ), s-program, regprop(s)); // Where, what.
next = regnext(s);
if (next == NULL) // Next ptr.
_tprintf(_T( "(0)" ));
else
_tprintf(_T( "(%d)" ), (s-program)+(next-s));
s += 3;
if (op == ANYOF || op == ANYBUT || op == EXACTLY)
{
// Literal string, where present.
while (*s != '\0')
{
_puttchar(*s);
s = _tcsinc(s);
}
s = _tcsinc(s);
}
_puttchar('\n');
}
// Header fields of interest.
if (regstart != '\0')
_tprintf(_T( "start `%c' " ), regstart);
if (reganch)
_tprintf(_T( "anchored " ));
if (regmust != NULL)
_tprintf(_T( "must have \"%s\"" ), regmust);
_tprintf(_T( "\n" ));
}
// regprop - printable representation of opcode
#define OUTPUT(s) case s: p = _T( #s ); break
LPTSTR CRegProgramAccessor::regprop( LPTSTR op )
{
LPTSTR p;
static TCHAR buf[50];
(void) _tcscpy(buf, _T( ":" ));
switch (OP(op))
{
OUTPUT( BOL );
OUTPUT( EOL );
OUTPUT( ANY );
OUTPUT( ANYOF );
OUTPUT( ANYBUT );
OUTPUT( BRANCH );
OUTPUT( EXACTLY );
OUTPUT( NOTHING );
OUTPUT( BACK );
OUTPUT( END );
OUTPUT( STAR );
OUTPUT( PLUS );
OUTPUT( WORDA );
OUTPUT( WORDZ );
case OPEN+1: case OPEN+2: case OPEN+3:
case OPEN+4: case OPEN+5: case OPEN+6:
case OPEN+7: case OPEN+8: case OPEN+9:
_stprintf(buf+_tcslen(buf), _T( "OPEN%d" ), OP(op)-OPEN);
p = NULL;
break;
case CLOSE+1: case CLOSE+2: case CLOSE+3:
case CLOSE+4: case CLOSE+5: case CLOSE+6:
case CLOSE+7: case CLOSE+8: case CLOSE+9:
_stprintf(buf+_tcslen(buf), _T( "CLOSE%d" ), OP(op)-CLOSE);
p = NULL;
break;
default:
regerror( REGERR_CORRUPTED_OPCODE );
break;
}
if (p != NULL)
(void) _tcscat(buf, p);
return(buf);
}
#endif
///////////////////////////////////////////////////////////////////////////////
Regexp::Regexp()
: rc(0),
string(0)
{
}
Regexp::Regexp( LPCTSTR exp, BOOL iCase )
: rc( new regexp( exp, iCase ) ),
string( 0 )
{
}
Regexp::Regexp( const Regexp &r )
: rc( r.rc ),
m_szError(r.m_szError),
string(r.string)
{
if ( rc )
rc->count++;
}
const Regexp & Regexp::operator=( const Regexp & r )
{
if ( this != &r )
{
if ( rc && rc->count-- == 0 )
delete rc;
rc = r.rc;
if ( rc )
rc->count++;
string = r.string;
m_szError = r.m_szError;
}
return *this;
}
Regexp::~Regexp()
{
if ( rc && rc->count-- == 0 )
delete rc;
}
bool Regexp::Match( const TCHAR * s )
{
ClearErrorString();
string = s;
bool ret = false;
if ( rc )
{
// copy on write !
if ( rc->count )
{
rc->count--;
rc = rc->getCopy();
}
ret = rc->regexec( s );
int i = 0;
if ( ret )
for ( i = 0; i < Regexp::NSUBEXP && rc->startp[i] ; i++ )
;
rc->numSubs = i - 1;
}
else
m_szError = CRegErrorHandler::FindErr( REGERR_NO_REGEXP );
return ret;
}
CString Regexp::GetReplaceString( LPCTSTR source ) const
{
ClearErrorString();
if ( rc )
return rc->GetReplaceString( source );
else
m_szError = CRegErrorHandler::FindErr( REGERR_NO_REGEXP );
return _T( "" );
}
int Regexp::SubStrings() const
{
ClearErrorString();
int ret = -1;
if ( rc )
ret = rc->numSubs;
else
m_szError = CRegErrorHandler::FindErr( REGERR_NO_REGEXP );
return ret;
}
int Regexp::SubStart( unsigned int i ) const
{
ClearErrorString();
int ret = -1;
if ( rc )
ret = rc->startp[safeIndex(i)] - string;
else
m_szError = CRegErrorHandler::FindErr( REGERR_NO_REGEXP );
return ret;
}
int Regexp::SubLength( unsigned int i ) const
{
ClearErrorString();
int ret = -1;
if ( rc )
{
i = safeIndex(i);
ret = rc->endp[i] - rc->startp[i];
}
else
m_szError = CRegErrorHandler::FindErr( REGERR_NO_REGEXP );
return ret;
}
bool Regexp::CompiledOK() const
{
return rc ? rc->Status() : false;
}
#ifdef _RE_DEBUG
void Regexp::Dump()
{
if ( rc )
rc->regdump();
#if defined( _DEBUG )
else
TRACE0( "No regexp to dump out\n" );
#endif
}
#endif
int Regexp::safeIndex( unsigned int i ) const
{
return i < Regexp::NSUBEXP ? i : Regexp::NSUBEXP;
}
const CString Regexp::operator[]( unsigned int i ) const
{
ClearErrorString();
ASSERT( rc );
if ( rc )
{
CString buffer;
int len = SubLength(i);
TCHAR * szbuf = buffer.GetBufferSetLength( len );
memcpy( szbuf, rc->startp[i], len * sizeof(TCHAR) );
buffer.ReleaseBuffer();
return buffer;
}
else
{
m_szError = CRegErrorHandler::FindErr( REGERR_NO_REGEXP );
return "";
}
}
void regexp::ignoreCase( const TCHAR * in, TCHAR * out )
{
// copy in to out making every top level character a [Aa] set
BOOL inRange = FALSE;
while( *in )
{
if ( *in == '[' )
inRange = TRUE;
if ( *in == ']' )
inRange = FALSE;
if ( ! inRange && isalpha( *in ) )
{
*out++ = '[';
*out++ = (TCHAR)toupper( *in );
*out++ = (TCHAR)tolower( *in );
*out++ = ']';
}
else
*out++ = *in;
in++;
}
*out = 0;
}
// GetReplaceString - Converts a replace expression to a string
// - perform substitutions after a regexp match
// Returns - The resultant string
CString regexp::GetReplaceString( const TCHAR* sReplaceExp ) const
{
CString szEmpty( _T( "" ) );
TCHAR *src = (TCHAR *)sReplaceExp;
TCHAR *buf;
TCHAR c;
int no;
size_t len;
if( sReplaceExp == NULL )
{
regerror( REGERR_NULL_TO_REGSUB );
return szEmpty;
}
if ( *program != MAGIC)
{
regerror( REGERR_DAMAGED_REGEXP_REGSUB );
return szEmpty;
}
// First compute the length of the string
int replacelen = 0;
while ((c = *src++) != _T('\0'))
{
if (c == _T('&'))
no = 0;
else if (c == _T('\\') && isdigit(*src))
no = *src++ - _T('0');
else
no = -1;
if (no < 0)
{
// Ordinary character.
if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
c = *src++;
replacelen++;
}
else if (startp[no] != NULL && endp[no] != NULL &&
endp[no] > startp[no])
{
// Get tagged expression
len = endp[no] - startp[no];
replacelen += len;
}
}
CString szReplace;
buf = szReplace.GetBufferSetLength( replacelen );
// Now we can create the string
src = (TCHAR *)sReplaceExp;
while ((c = *src++) != _T('\0'))
{
if (c == _T('&'))
no = 0;
else if (c == _T('\\') && isdigit(*src))
no = *src++ - _T('0');
else
no = -1;
if (no < 0)
{
// Ordinary character.
if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
c = *src++;
*buf++ = c;
}
else if (startp[no] != NULL && endp[no] != NULL &&
endp[no] > startp[no])
{
// Get tagged expression
len = endp[no] - startp[no];
_tcsncpy(buf, startp[no], len);
buf += len;
if (len != 0 && *(buf-1) == _T( '\0' ))
{ /* strncpy hit NUL. */
regerror( REGERR_DAMAGED_MATCH_STRING );
return szEmpty;
}
}
}
szReplace.ReleaseBuffer( replacelen );
return szReplace;
}
CString Regexp::GetErrorString() const
{
// make sure that if status == 0 that we have an error string
ASSERT( ( ! CompiledOK() ) ? ( rc ? rc->GetErrorString() : m_szError).GetLength() != 0 : 1 );
return rc ? rc->GetErrorString() : m_szError ;
}
void Regexp::ClearErrorString() const
{
if ( rc )
rc->ClearErrorString();
m_szError.Empty();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -