📄 regexp.cpp
字号:
(void) memmove(opnd+3, opnd, (size_t)((regcode - opnd)*sizeof(TCHAR)));
regcode += 3;
place = opnd; // Op node, where operand used to be.
*place++ = op;
*place++ = _T('\0');
*place++ = _T('\0');
}
//
// regtail - set the next-pointer at the end of a node chain
void CRegExp::regtail(TCHAR *p, TCHAR *val)
{
TCHAR *scan;
TCHAR *temp;
// int offset;
if (!bEmitCode)
return;
// Find last node.
for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)
continue;
*((short *)(scan+1)) = (OP(scan) == BACK) ? scan - val : val - scan;
}
// regoptail - regtail on operand of first argument; nop if operandless
void CRegExp::regoptail(TCHAR *p, TCHAR *val)
{
// "Operandless" and "op != BRANCH" are synonymous in practice.
if (!bEmitCode || OP(p) != BRANCH)
return;
regtail(OPERAND(p), val);
}
// RegFind - match a regexp against a string
// Returns - Returns position of regexp or -1
// if regular expression not found
// Note - The regular expression should have been
// previously compiled using RegComp
int CRegExp::RegFind(const TCHAR *str)
{
TCHAR *string = (TCHAR *)str; // avert const poisoning
TCHAR *s;
// Delete any previously stored found string
delete sFoundText;
sFoundText = NULL;
// Be paranoid.
if(string == NULL)
{
TRACE0("NULL argument to regexec\n");
return(-1);
}
// Check validity of regex
if (!bCompiled)
{
TRACE0("No regular expression provided yet.\n");
return(-1);
}
// If there is a "must appear" string, look for it.
if (regmust != NULL && _tcsstr(string, regmust) == NULL)
return(-1);
// Mark beginning of line for ^
regbol = string;
// Simplest case: anchored match need be tried only once.
if (reganch)
{
if( regtry(string) )
{
// Save the found substring in case we need it
sFoundText = new TCHAR[GetFindLen()+1];
sFoundText[GetFindLen()] = _T('\0');
_tcsncpy(sFoundText, string, GetFindLen() );
return 0;
}
//String not found
return -1;
}
// Messy cases: unanchored match.
if (regstart != _T('\0'))
{
// We know what TCHAR it must start with.
for (s = string; s != NULL; s = _tcschr(s+1, regstart))
if (regtry(s))
{
int nPos = s-str;
// Save the found substring in case we need it later
sFoundText = new TCHAR[GetFindLen()+1];
sFoundText[GetFindLen()] = _T('\0');
_tcsncpy(sFoundText, s, GetFindLen() );
return nPos;
}
return -1;
}
else
{
// We don't -- general case
for (s = string; !regtry(s); s++)
if (*s == _T('\0'))
return(-1);
int nPos = s-str;
// Save the found substring in case we need it later
sFoundText = new TCHAR[GetFindLen()+1];
sFoundText[GetFindLen()] = _T('\0');
_tcsncpy(sFoundText, s, GetFindLen() );
return nPos;
}
// NOTREACHED
}
// regtry - try match at specific point
int CRegExp::regtry(TCHAR *string)
{
int i;
TCHAR **stp;
TCHAR **enp;
reginput = string;
stp = startp;
enp = endp;
for (i = NSUBEXP; i > 0; i--)
{
*stp++ = NULL;
*enp++ = NULL;
}
if (regmatch(program))
{
startp[0] = string;
endp[0] = reginput;
return(1);
}
else
return(0);
}
// regmatch - main matching routine
//
// Conceptually the strategy is simple: check to see whether the current
// node matches, call self recursively to see whether the rest matches,
// and then act accordingly. In practice we make some effort to avoid
// recursion, in particular by going through "ordinary" nodes (that don't
// need to know whether the rest of the match failed) by a loop instead of
// by recursion.
int CRegExp::regmatch(TCHAR *prog)
{
TCHAR *scan; // Current node.
TCHAR *next; // Next node.
for (scan = prog; scan != NULL; scan = next) {
next = regnext(scan);
switch (OP(scan)) {
case BOL:
if (reginput != regbol)
return(0);
break;
case EOL:
if (*reginput != _T('\0'))
return(0);
break;
case ANY:
if (*reginput == _T('\0'))
return(0);
reginput++;
break;
case EXACTLY: {
size_t len;
TCHAR *const opnd = OPERAND(scan);
// Inline the first character, for speed.
if (*opnd != *reginput)
return(0);
len = _tcslen(opnd);
if (len > 1 && _tcsncmp(opnd, reginput, len) != 0)
return(0);
reginput += len;
break;
}
case ANYOF:
if (*reginput == _T('\0') ||
_tcschr(OPERAND(scan), *reginput) == NULL)
return(0);
reginput++;
break;
case ANYBUT:
if (*reginput == _T('\0') ||
_tcschr(OPERAND(scan), *reginput) != NULL)
return(0);
reginput++;
break;
case NOTHING:
break;
case BACK:
break;
case OPEN+1: case OPEN+2: case OPEN+3:
case OPEN+4: case OPEN+5: case OPEN+6:
case OPEN+7: case OPEN+8: case OPEN+9: {
const int no = OP(scan) - OPEN;
TCHAR *const input = reginput;
if (regmatch(next)) {
// Don't set startp if some later
// invocation of the same parentheses
// already has.
if (startp[no] == NULL)
startp[no] = input;
return(1);
} else
return(0);
break;
}
case CLOSE+1: case CLOSE+2: case CLOSE+3:
case CLOSE+4: case CLOSE+5: case CLOSE+6:
case CLOSE+7: case CLOSE+8: case CLOSE+9: {
const int no = OP(scan) - CLOSE;
TCHAR *const input = reginput;
if (regmatch(next)) {
// Don't set endp if some later
// invocation of the same parentheses
// already has.
if (endp[no] == NULL)
endp[no] = input;
return(1);
} else
return(0);
break;
}
case BRANCH: {
TCHAR *const save = reginput;
if (OP(next) != BRANCH) // No choice.
next = OPERAND(scan); // Avoid recursion.
else {
while (OP(scan) == BRANCH) {
if (regmatch(OPERAND(scan)))
return(1);
reginput = save;
scan = regnext(scan);
}
return(0);
// NOTREACHED
}
break;
}
case STAR:
case PLUS: {
const TCHAR nextch =
(OP(next) == EXACTLY) ? *OPERAND(next) : _T('\0');
size_t no;
TCHAR *const save = reginput;
const size_t min = (OP(scan) == STAR) ? 0 : 1;
for (no = regrepeat(OPERAND(scan)) + 1; no > min; no--) {
reginput = save + no - 1;
// If it could work, try it.
if (nextch == _T('\0') || *reginput == nextch)
if (regmatch(next))
return(1);
}
return(0);
break;
}
case END:
return(1); // Success!
break;
default:
TRACE0("regexp corruption\n");
return(0);
break;
}
}
// We get here only if there's trouble -- normally "case END" is
// the terminating point.
TRACE0("corrupted pointers\n");
return(0);
}
// regrepeat - report how many times something simple would match
size_t CRegExp::regrepeat(TCHAR *node)
{
size_t count;
TCHAR *scan;
TCHAR ch;
switch (OP(node))
{
case ANY:
return(_tcslen(reginput));
break;
case EXACTLY:
ch = *OPERAND(node);
count = 0;
for (scan = reginput; *scan == ch; scan++)
count++;
return(count);
break;
case ANYOF:
return(_tcsspn(reginput, OPERAND(node)));
break;
case ANYBUT:
return(_tcscspn(reginput, OPERAND(node)));
break;
default: // Oh dear. Called inappropriately.
TRACE0("internal error: bad call of regrepeat\n");
return(0); // Best compromise.
break;
}
// NOTREACHED
}
// regnext - dig the "next" pointer out of a node
TCHAR *CRegExp::regnext(TCHAR *p)
{
const short &offset = *((short*)(p+1));
if (offset == 0)
return(NULL);
return((OP(p) == BACK) ? p-offset : p+offset);
}
// GetReplaceString - Converts a replace expression to a string
// Returns - Pointer to newly allocated string
// Caller is responsible for deleting it
TCHAR* CRegExp::GetReplaceString( const TCHAR* sReplaceExp )
{
TCHAR *src = (TCHAR *)sReplaceExp;
TCHAR *buf;
TCHAR c;
int no;
size_t len;
if( sReplaceExp == NULL || sFoundText == NULL )
return NULL;
// First compute the length of the string
int replacelen = 0;
while ((c = *src++) != _T('\0'))
{
if (c == _T('&'))
no = 0;
else if (c == _T('\\') && isdigit(*src))
no = *src++ - _T('0');
else
no = -1;
if (no < 0)
{
// Ordinary character.
if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
c = *src++;
replacelen++;
}
else if (startp[no] != NULL && endp[no] != NULL &&
endp[no] > startp[no])
{
// Get tagged expression
len = endp[no] - startp[no];
replacelen += len;
}
}
// Now allocate buf
buf = new TCHAR[replacelen+1];
if( buf == NULL )
return NULL;
TCHAR* sReplaceStr = buf;
// Add null termination
buf[replacelen] = _T('\0');
// Now we can create the string
src = (TCHAR *)sReplaceExp;
while ((c = *src++) != _T('\0'))
{
if (c == _T('&'))
no = 0;
else if (c == _T('\\') && isdigit(*src))
no = *src++ - _T('0');
else
no = -1;
if (no < 0)
{
// Ordinary character.
if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
c = *src++;
*buf++ = c;
}
else if (startp[no] != NULL && endp[no] != NULL &&
endp[no] > startp[no])
{
// Get tagged expression
len = endp[no] - startp[no];
int tagpos = startp[no] - startp[0];
_tcsncpy(buf, sFoundText + tagpos, len);
buf += len;
}
}
return sReplaceStr;
}
int RegSearchFind(CRegExp& r, CStringEx& string, LPCTSTR sSearchExp, long startPos, long& len)
{
int nPos = 0;
LPTSTR str = (LPTSTR)(LPCTSTR)string + startPos;
r.RegComp(sSearchExp);
if ((nPos = r.RegFind((LPTSTR)str)) != -1) {
int offset = str-(LPCTSTR)string+nPos;
len = r.GetFindLen();
return nPos + startPos;
}
return -1;
}
int RegSearchReplaceAll( CStringEx& string, LPCTSTR sSearchExp,
LPCTSTR sReplaceExp )
{
int nPos = 0;
int nReplaced = 0;
CRegExp r;
LPTSTR str = (LPTSTR)(LPCTSTR)string;
r.RegComp( sSearchExp );
while( (nPos = r.RegFind((LPTSTR)str)) != -1 )
{
nReplaced++;
TCHAR *pReplaceStr = r.GetReplaceString( sReplaceExp );
int offset = str-(LPCTSTR)string+nPos;
string.Replace( offset, r.GetFindLen(),
pReplaceStr );
// Replace might have caused a reallocation
str = (LPTSTR)(LPCTSTR)string + offset + _tcslen(pReplaceStr);
delete pReplaceStr;
}
return nReplaced;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -