📄 regexp.cpp
字号:
if (C == 0) break;
P = P->fPrev;
}
//_tprintf(_T("P = %s, c = %d", P ? "ok":"null", C));
if (C != 0) return 0;
}
assert(Last);
if (What != _T('?') && What != _T('|'))
if (CountWidth(Last) == 0) {
// puts(_T("FAILED count"));
return 0;
}
switch (What) {
case _T('?'): /* BRANCH x NOTHING */
New = NewNode(RE_BRANCH | RE_GREEDY | What);
No = NewNode(RE_NOTHING);
if (!New || !No) return 0;
No->fPrev = *N;
if (*N)
(*N)->fNext = No;
New->fNext = Last;
New->fPrev = Last->fPrev;
Last->fPrev = New;
if (New->fPrev) {
New->fPrev->fNext = New;
} else {
*F = New;
}
New->fPtr = No;
No->fPtr = New;
*N = No;
//puts(_T("BRANCH ?"));
break;
case _T('*'):
case _T('@'):
New = NewNode(RE_BRANCH | What | ((What == _T('*')) ? RE_GREEDY : 0));
Jump = NewNode(RE_JUMP);
No = NewNode(RE_NOTHING);
if (!New || !No || !Jump) return 0;
No->fPrev = Jump;
Jump->fNext = No;
Jump->fPrev = *N;
if (*N)
(*N)->fNext = Jump;
New->fNext = Last;
New->fPrev = Last->fPrev;
Last->fPrev = New;
if (New->fPrev) {
New->fPrev->fNext = New;
} else {
*F = New;
}
New->fPtr = No;
No->fPtr = New;
Jump->fPtr = New;
*N = No;
//puts(_T("BRANCH *"));
break;
case _T('#'):
case _T('+'):
New = NewNode(RE_BRANCH | What | ((What == _T('+')) ? RE_GREEDY : 0));
Skip = NewNode(RE_JUMP);
Jump = NewNode(RE_JUMP);
No = NewNode(RE_NOTHING);
if (!New || !No || !Jump) return 0;
No->fPrev = Jump;
Jump->fPrev = *N;
Jump->fNext = No;
Skip->fNext = New;
New->fPrev = Skip;
if (*N)
(*N)->fNext = Jump;
New->fNext = Last;
Skip->fPrev = Last->fPrev;
Last->fPrev = New;
if (Skip->fPrev) {
Skip->fPrev->fNext = Skip;
} else {
*F = Skip;
}
New->fPtr = No;
No->fPtr = New;
Jump->fPtr = New;
Skip->fPtr = Last;
*N = No;
//puts(_T("BRANCH +"));
break;
case _T('|'):
New = NewNode(RE_BRANCH | RE_GREEDY | What);
Jump = NewNode(RE_BREAK);
No = NewNode(RE_NOTHING);
if (!New || !No || !Jump) return 0;
No->fPrev = Jump;
Jump->fNext = No;
Jump->fPrev = *N;
if (*N)
(*N)->fNext = Jump;
New->fNext = Last;
New->fPrev = Last->fPrev;
Last->fPrev = New;
if (New->fPrev) {
New->fPrev->fNext = New;
} else {
*F = New;
}
New->fPtr = No;
No->fPtr = New;
Jump->fPtr = New;
*N = No;
//puts(_T("BRANCH |"));
break;
}
return 1;
}
return 0;
}
#define CHECK(n) do { if ((n) == 0) { return 0;} } while (0)
static RxNode *RxComp(LPCTSTR *Regexp) {
RxNode *F = 0;
RxNode *N = 0;
int C;
TCHAR Ch;
while (**Regexp) {
// puts(*Regexp);
switch (Ch = (*(*Regexp)++)) {
case _T('?'):
case _T('*'):
case _T('+'):
case _T('@'):
case _T('#'):
case _T('|'):
CHECK(MakeSub(&F, &N, Ch));
break;
case _T('}'):
case _T(')'):
return F;
case _T('{'):
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN)));
CHECK(AddNode(&F, &N, RxComp(Regexp)));
while (N->fNext) N = N->fNext;
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE)));
break;
case _T('('):
C = ++RegCount;
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN | RE_MEM | C)));
CHECK(AddNode(&F, &N, RxComp(Regexp)));
while (N->fNext) N = N->fNext;
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE | RE_MEM | C)));
break;
case _T('\\'):CHECK(AddNode(&F, &N, NewEscape(Regexp))); break;
case _T('['): CHECK(AddNode(&F, &N, NewSet(Regexp))); break;
case _T('^'): CHECK(AddNode(&F, &N, NewNode(RE_ATBOL))); break;
case _T('$'): CHECK(AddNode(&F, &N, NewNode(RE_ATEOL))); break;
case _T('.'): CHECK(AddNode(&F, &N, NewNode(RE_ANY))); break;
case _T('<'): CHECK(AddNode(&F, &N, NewNode(RE_ATBOW))); break;
case _T('>'): CHECK(AddNode(&F, &N, NewNode(RE_ATEOW))); break;
default:
--*Regexp;
CHECK(AddNode(&F, &N, NewChar(**Regexp)));
++*Regexp;
break;
}
}
return F;
}
RxNode *RxOptimize(RxNode *rx) {
return rx;
}
RxNode *RxCompile(LPCTSTR Regexp) {
RxNode *n = 0, *x;
if (Regexp == 0) return 0;
RegCount = 0;
n = RxComp(&Regexp);
if (n == 0) return 0;
n = RxOptimize(n);
x = n;
while (x->fNext) x = x->fNext;
x->fNext = NewNode(RE_END);
return n;
}
void RxFree(RxNode *n) {
RxNode *p;
while (n) {
p = n;
n = n->fNext;
switch (p->fWhat) {
case RE_INSET:
case RE_NOTINSET:
case RE_CHAR:
free(p->fChar);
break;
default:
break;
}
free(p);
}
}
#define ChClass(x) (((((x) >= _T('A')) && ((x) <= _T('Z'))) || (((x) >= _T('a')) && ((x) <= _T('z'))) || (((x) >= _T('0')) && ((x) <= _T('9'))))?1:0)
static RxMatchRes *match;
static LPCTSTR bop;
static LPCTSTR eop;
static int flags = RX_CASE;
static LPCTSTR rex;
int RxMatch(RxNode *rx) {
RxNode *n = rx;
//_tprintf(_T(">>"));
while (n) {
//_tprintf(_T("%-50.50s\n", rex));
//RxDump(1, n);
switch (n->fWhat) {
case RE_NOTHING:
break;
case RE_CASE:
flags |= RX_CASE;
break;
case RE_NCASE:
flags &= ~RX_CASE;
break;
case RE_ATBOL:
if (rex != bop) return 0;
break;
case RE_ATEOL:
if (rex != eop) return 0;
break;
case RE_ANY:
if (rex == eop) return 0;
rex++;
break;
case RE_WSPACE:
if (rex == eop) return 0;
if (*rex != _T(' ') && *rex != _T('\n') && *rex != _T('\r') && *rex != _T('\t')) return 0;
rex++;
break;
case RE_NWSPACE:
if (rex == eop) return 0;
if (*rex == _T(' ') || *rex == _T('\n') || *rex == _T('\r') || *rex == _T('\t')) return 0;
rex++;
break;
case RE_WORD:
if (rex == eop) return 0;
if (!_istalnum(*rex)) return 0;
rex++;
break;
case RE_NWORD:
if (rex == eop) return 0;
if (_istalnum(*rex)) return 0;
rex++;
break;
case RE_DIGIT:
if (rex == eop) return 0;
if (!_istdigit(*rex)) return 0;
rex++;
break;
case RE_NDIGIT:
if (rex == eop) return 0;
if (_istdigit(*rex)) return 0;
rex++;
break;
case RE_UPPER:
if (rex == eop) return 0;
if (!_istupper(*rex)) return 0;
rex++;
break;
case RE_LOWER:
if (rex == eop) return 0;
if (!_istlower(*rex)) return 0;
rex++;
break;
case RE_ATBOW:
if (rex >= eop) return 0;
if (rex > bop) {
if ((ChClass(*rex) != 1) || (ChClass(*(rex-1)) != 0)) return 0;
}
break;
case RE_ATEOW:
if (rex <= bop) return 0;
if (rex < eop) {
if ((ChClass(*rex) != 0) || (ChClass(*(rex-1)) != 1)) return 0;
}
break;
case RE_CHAR:
if (rex == eop) return 0;
if (flags & RX_CASE) {
if (*n->fChar != *rex) return 0;
if (memcmp(rex, n->fChar, n->fLen) != 0) return 0;
} else {
for (int i = 0; i < n->fLen; i++)
if (_totupper(rex[i]) != _totupper(n->fChar[i]))
return 0;
}
rex += n->fLen;
break;
case RE_INSET:
if (rex == eop) return 0;
#ifdef _UNICODE
if ((n->fChar[(TCHAR)(*rex) >> 3] & (1 << ((TCHAR)(*rex) & 7))) == 0) return 0;
#else // _UNICODE
if ((n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) == 0) return 0;
#endif // _UNICODE
rex++;
break;
case RE_NOTINSET:
if (rex == eop) return 0;
#ifdef _UNICODE
if (n->fChar[(TCHAR)(*rex) >> 3] & (1 << ((TCHAR)(*rex) & 7))) return 0;
#else // _UNICODE
if (n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) return 0;
#endif // _UNICODE
rex++;
break;
case RE_JUMP:
n = n->fPtr;
continue;
case RE_END:
return 1;
case RE_BREAK:
n = n->fNext;
if (n->fNext == 0) break;
n = n->fNext;
if (n->fWhat & RE_BRANCH) {
while ((n->fWhat & RE_BRANCH) && n->fPtr && ((n->fWhat & 0xFF) == _T('|')))
n = n->fPtr->fNext;
}
if (n->fWhat & RE_GROUP) {
int C = 1;
n = n->fNext;
while ((C > 0) && n) {
if (n->fWhat & RE_GROUP) {
if (n->fWhat & RE_OPEN) C++;
else C--;
}
if (C == 0) break;
n = n->fNext;
}
}
break;
default:
if (n->fWhat & RE_GROUP) {
if (n->fWhat & RE_MEM) {
LPCTSTR save = rex;
int b = n->fWhat & 0xFF;
int fl = flags;
if (RxMatch(n->fNext) == 0) {
flags = fl;
if (n->fWhat & RE_OPEN)
match->Open[b] = -1;
else
match->Close[b] = -1;
return 0;
}
if (n->fWhat & RE_OPEN) {
// if (match->Open[b] == -1)
match->Open[b] = (int) (save - bop);
} else {
// if (match->Close[b] == -1)
match->Close[b] = (int) (save - bop);
}
return 1;
}
} else if (n->fWhat & RE_BRANCH) {
LPCTSTR save = rex;
int fl = flags;
if ((n->fWhat & RE_GREEDY) == 0) {
if (RxMatch(n->fPtr) == 1) return 1;
flags = fl;
rex = save;
} else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -