📄 regexp.cpp
字号:
if (RxMatch(n->fNext) == 1) return 1;
flags = fl;
rex = save;
n = n->fPtr;
continue;
}
}
break;
}
n = n->fNext;
}
/* NOTREACHED */
assert(1 == 0 /* internal regexp error */);
return 0;
}
int RxTry(RxNode *rx, LPCTSTR s) {
int fl = flags;
rex = s;
for (int i = 0; i < NSEXPS; i++)
match->Open[i] = match->Close[i] = -1;
if (RxMatch(rx)) {
match->Open[0] = (int) (s - bop);
match->Close[0] = (int) (rex - bop);
return 1;
}
flags = fl;
return 0;
}
int RxExec(RxNode *Regexp, LPCTSTR Data, int Len, LPCTSTR Start, RxMatchRes *Match, unsigned int RxOpt) {
TCHAR Ch;
if (Regexp == 0) return 0;
match = Match;
bop = Data;
eop = Data + Len;
flags = RxOpt;
for (int i = 0; i < NSEXPS; i++) Match->Open[i] = Match->Close[i] = -1;
switch (Regexp->fWhat) { // this should be more clever
case RE_ATBOL: // match is anchored
return RxTry(Regexp, Start);
case RE_CHAR: // search for a character to match
Ch = Regexp->fChar[0];
if (Start == eop)
break;
if (flags & RX_CASE) {
while (1) {
while (Start < eop && *Start != Ch)
Start++;
if (Start == eop)
break;
if (RxTry(Regexp, Start))
return 1;
if (++Start == eop)
break;
}
} else {
Ch = _totupper(Ch);
while (1) {
while (Start < eop && (TCHAR) _totupper(*Start) != Ch)
Start++;
if (Start == eop)
break;
if (RxTry(Regexp, Start))
return 1;
if (++Start == eop)
break;
}
}
break;
default: // (slow)
do {
if (RxTry(Regexp, Start)) return 1;
} while (Start++ < eop);
break;
}
return 0;
}
#define FLAG_UP_CASE 1
#define FLAG_DOWN_CASE 2
#define FLAG_UP_NEXT 4
#define FLAG_DOWN_NEXT 8
static int add(int *len, LPTSTR *s, LPCTSTR a, int alen, int &flag) {
int NewLen = *len + alen;
int i;
NewLen = NewLen * 2;
if (alen == 0)
return 0;
if (*s) {
*s = (LPTSTR) realloc(*s, NewLen);
assert(*s);
memcpy(*s + *len, a, alen);
} else {
*s = (LPTSTR) malloc(NewLen);
assert(*s);
memcpy(*s, a, alen);
*len = 0;
}
if (flag & FLAG_UP_CASE) {
LPTSTR p = *s + *len;
for (i = 0; i < alen; i++) {
*p = (TCHAR)_totupper(*p);
p++;
}
} else if (flag & FLAG_DOWN_CASE) {
LPTSTR p = *s + *len;
for (i = 0; i < alen; i++) {
*p = (TCHAR)_totlower(*p);
p++;
}
}
if (flag & FLAG_UP_NEXT) {
LPTSTR p = *s + *len;
*p = (TCHAR)_totupper(*p);
flag &= ~FLAG_UP_NEXT;
} else if (flag & FLAG_DOWN_NEXT) {
LPTSTR p = *s + *len;
*p = (TCHAR)_totlower(*p);
flag &= ~FLAG_DOWN_NEXT;
}
*len += alen;
return 0;
}
int RxReplace(LPCTSTR rep, LPCTSTR Src, int /*len*/, RxMatchRes match, LPTSTR *Dest, int *Dlen) {
int dlen = 0;
LPTSTR dest = 0;
TCHAR Ch;
int n;
int flag = 0;
*Dest = 0;
*Dlen = 0;
// add(&dlen, &dest, Src, match.Open[0]);
while (*rep) {
switch (Ch = *rep++) {
// case _T('&'):
// add(&dlen, &dest, Src + match.Open[0], match.Close[0] - match.Open[0], flag);
// break;
case _T('\\'):
switch (Ch = *rep++) {
case _T('0'):
case _T('1'): case _T('2'): case _T('3'):
case _T('4'): case _T('5'): case _T('6'):
case _T('7'): case _T('8'): case _T('9'):
n = Ch - 48;
if (match.Open[n] != -1 && match.Close[n] != -1) {
add(&dlen, &dest, Src + match.Open[n], match.Close[n] - match.Open[n], flag);
} else return -1;
break;
case 0:
if (dest) free(dest);
return -1; // error
case _T('r'): Ch = _T('\r'); add(&dlen, &dest, &Ch, 1, flag); break;
case _T('n'): Ch = _T('\n'); add(&dlen, &dest, &Ch, 1, flag); break;
case _T('b'): Ch = _T('\b'); add(&dlen, &dest, &Ch, 1, flag); break;
case _T('a'): Ch = _T('\a'); add(&dlen, &dest, &Ch, 1, flag); break;
case _T('t'): Ch = _T('\t'); add(&dlen, &dest, &Ch, 1, flag); break;
case _T('U'): flag |= FLAG_UP_CASE; break;
case _T('u'): flag |= FLAG_UP_NEXT; break;
case _T('L'): flag |= FLAG_DOWN_CASE; break;
case _T('l'): flag |= FLAG_DOWN_NEXT; break;
case _T('E'):
case _T('e'): flag &= ~(FLAG_UP_CASE | FLAG_DOWN_CASE); break;
case _T('x'):
{
int N = 0;
int A = 0;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
rep++;
A = N << 4;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
rep++;
A = A + N;
Ch = (TCHAR)A;
}
add(&dlen, &dest, &Ch, 1, flag);
break;
case _T('d'):
{
int N = 0;
int A = 0;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 9) return 0;
rep++;
A = N * 100;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 9) return 0;
rep++;
A = N * 10;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 9) return 0;
rep++;
A = A + N;
Ch = (TCHAR)A;
}
add(&dlen, &dest, &Ch, 1, flag);
break;
case _T('o'):
{
int N = 0;
int A = 0;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 7) return 0;
rep++;
A = N * 64;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 7) return 0;
rep++;
A = N * 8;
if (*rep == 0) return 0;
N = _totupper(*rep) - 48; if (N > 7) return 0;
rep++;
A = A + N;
Ch = (TCHAR)A;
}
add(&dlen, &dest, &Ch, 1, flag);
break;
default:
add(&dlen, &dest, &Ch, 1, flag);
break;
}
break;
default:
add(&dlen, &dest, &Ch, 1, flag);
break;
}
}
// add(&dlen, &dest, Src + match.Close[0], len - match.Close[0]);
*Dlen = dlen;
*Dest = dest;
return 0;
}
#if 0
static void RxDump(int N, RxNode *n) {
while (n) {
for (int i = 0; i < N; i++) _tprintf(_T(" "));
switch (n->fWhat) {
case RE_NOTHING: _tprintf(_T("NOTHING\n")); break;
case RE_CHAR: _tprintf(_T("CHAR '%.1s'\n"), n->fChar); break;
case RE_ATBOL: _tprintf(_T("^\n")); break;
case RE_ATEOL: _tprintf(_T("$\n")); break;
case RE_ANY: _tprintf(_T(".\n")); break;
case RE_INSET: _tprintf(_T("[\n")/*, n->fChar*/); break;
case RE_NOTINSET: _tprintf(_T("[^\n")/*, n->fChar*/); break;
case RE_ATBOW: _tprintf(_T("<\n")); break;
case RE_ATEOW: _tprintf(_T(">\n")); break;
case RE_WSPACE: _tprintf(_T("WSPACE\n")); break;
case RE_NWSPACE: _tprintf(_T("NWSPACE\n")); break;
case RE_UPPER: _tprintf(_T("UPPER\n")); break;
case RE_LOWER: _tprintf(_T("LOWER\n")); break;
case RE_JUMP: _tprintf(_T("JUMP\n")); break;
case RE_BREAK: _tprintf(_T("BREAK\n")); break;
case RE_END: _tprintf(_T("END\n")); break;
default:
if (n->fWhat & RE_GROUP) {
if (n->fWhat & RE_MEM) {
if (n->fWhat & RE_OPEN) _tprintf(_T("( %d\n"), n->fWhat & 0xFF);
if (n->fWhat & RE_CLOSE) _tprintf(_T(") %d\n"), n->fWhat & 0xFF);
} else {
if (n->fWhat & RE_OPEN) _tprintf(_T("{\n"));
if (n->fWhat & RE_CLOSE) _tprintf(_T("}\n"));
}
} else if (n->fWhat & RE_BRANCH) {
if (n->fWhat & RE_GREEDY) {
_tprintf(_T("%c\n"), n->fWhat & 0xFF);
} else {
_tprintf(_T("%c\n"), n->fWhat & 0xFF);
}
} else {
_tprintf(_T("???????????????\n"));
}
break;
}
n = n->fNext;
}
}
#define TEST(rc,rx,st) \
_tcscpy(line,st); \
assert((a = RxCompile(rx)) != 0); \
puts(_T("\n--- " rx " -- " st " -- ")); \
RxDump(0,a);\
assert(rc == RxExec(a, line, _tcslen(line), line, &b)); \
RxFree(a);
int main() {
RxNode *a;
RxMatchRes b;
TCHAR line[1024];
TEST(1, _T("a"), _T("a"));
TEST(0, _T("b"), _T("a"));
TEST(1, _T("aaaa"), _T("aaaa"));
TEST(0, _T("bbbb"), _T("aaaa"));
TEST(1, _T("."), _T("a"));
TEST(0, _T("."), _T(""));
TEST(1, _T("a.."), _T("axx"));
TEST(0, _T("a.."), _T("b.."));
TEST(1, _T("a?b"), _T("ab"));
TEST(1, _T("a?b"), _T("xb"));
TEST(0, _T("a?C"), _T("xb"));
TEST(1, _T("{aa}?b"), _T("aab"));
TEST(1, _T("{aa}?b"), _T("xab"));
TEST(0, _T("{aa}?C"), _T("xxb"));
TEST(1, _T("^aa"), _T("aa"));
TEST(0, _T("^aa"), _T("baa"));
TEST(1, _T("^aa$"),_T("aa"));
TEST(0, _T("^aa$"), _T("baab"));
TEST(1, _T("a*b"), _T("aaab"));
TEST(0, _T("a*b"), _T("aaaa"));
TEST(1, _T("{aa}*b"), _T("aaab"));
TEST(0, _T("{aa}*b"), _T("aaaa"));
TEST(1, _T("b+"), _T("bb"));
TEST(1, _T("b+"), _T("b"));
TEST(0, _T("b+"), _T("a"));
TEST(1, _T("^b+$"), _T("b"));
TEST(0, _T("^b+$"), _T("aba"));
TEST(1, _T("a|b"), _T(" a"));
TEST(1, _T("a|b"), _T(" b"));
TEST(0, _T("a|b"), _T(" c"));
TEST(1, _T("a|b|c|d|e"), _T(" a "));
TEST(1, _T("a|b|c|d|e"), _T(" c "));
TEST(1, _T("a|b|c|d|e"), _T(" e "));
TEST(0, _T("a|b|c|d|e"), _T(" x "));
TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" a "));
TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" c "));
TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" e "));
TEST(0, _T("{a}|{b}|{c}|{d}|{e}"), _T(" x "));
TEST(1, _T("^xx{alpha}|{beta}xx$"), _T("xxalphaxx"));
TEST(1, _T("^xx{alpha}|{beta}xx$"), _T("xxbetaxx"));
TEST(1, _T("[a-z]"), _T("aaa"));
TEST(1, _T("^{Error}|{Warning}"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
TEST(1, _T("^{Error}|{Warning} (.+)"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
TEST(1, _T("^{Error}|{Warning} ([a-z.]#) ([0-9]#)"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
TEST(1, _T("^{Error}|{Warning} (.+) ([0-9]+): (.*)$"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
TEST(1, _T("^{Error}|{Warning} (.+) ([0-9]+): (.*)$"), _T("Error search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
TEST(1, _T("^([a-z]+ +)*\\("), _T("blabla bla bla bla ("));
TEST(1, _T("^([a-z]+\\s+)+\\("), _T("blabla bla bla bla ("));
TEST(1, _T("^([a-z]+\\s*)+\\("), _T("blabla bla bla bla("));
TEST(1, _T("^([a-z]+\\s+)+\\("), _T("blabla bla bla bla ("));
TEST(1, _T("^([a-z]+\\s*)+\\("), _T("blabla bla bla bla("));
TEST(1, _T("^([a-z]# #)*\\("), _T("blabla bla bla bla ("));
TEST(1, _T("^([a-z]+ @)@\\("), _T("blabla bla bla bla ("));
TEST(1, _T("^[\\x20-\\xFF]+$"), _T("blabla"));
TEST(1, _T("{a{a{a{a|a}|{a|a}a}a}a|a}"), _T("aaaaaaaaaaaaaaaaa"));
while (1) {
_tprintf(_T ("Regexp: ")); fflush(stdout); gets(line);
if (!*line) break;
a = RxCompile(line); RxDump(0, a);
_tprintf(_T ("String: ")); fflush(stdout); gets(line);
_tprintf(_T ("rc = %d\n"), RxExec(a, line, _tcslen(line), line, &b));
for (int i = 0; i < NSEXPS; i++) {
if (b.Open[i] != -1) {
_tprintf(_T ("%d: %d %d\n"), i, b.Open[i], b.Close[i]);
}
}
RxFree(a);
}
return 0;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -