📄 wcpattern.cpp
字号:
{
bool useN = 1, useR = 1;
NFAClassUNode * clazz = new NFAClassUNode(1);
if ((flags & WCPattern::UNIX_LINE_MODE) != 0) useR = 0;
if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
if (useN) clazz->vals[(wchar_t)'\n'] = 1;
if (useR) clazz->vals[(wchar_t)'\r'] = 1;
next = registerNode(clazz);
}
break;
case (wchar_t)'(':
{
NFAUNode * end, * t1, * t2;
t1 = parse(1, 0, &end);
if (!t1) raiseError();
else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
{
cur->next = t2;
cur = t2->next;
}
else
{
cur->next = t1;
cur = end;
}
}
break;
case (wchar_t)')':
if (!inParen) raiseError();
else if (inOr)
{
--curInd;
cur = cur->next = registerNode(new NFAAcceptUNode);
flags = oldFlags;
return start;
}
else
{
if (ahead)
{
cur = cur->next = registerNode(new NFAAcceptUNode);
flags = oldFlags;
return *end = registerNode(new NFALookAheadUNode(start, pos));
}
else if (indep)
{
cur = cur->next = registerNode(new NFAAcceptUNode);
flags = oldFlags;
return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1));
}
else // capping or noncapping, it doesnt matter
{
*end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc));
next = quantifyGroup(start, *end, grc);
if (next)
{
start = next;
*end = next->next;
}
flags = oldFlags;
return start;
}
}
break;
case (wchar_t)'{': // registered pattern
cur->next = parseRegisteredWCPattern(&next);
if (cur->next) cur = next;
break;
case (wchar_t)'*':
case (wchar_t)'+':
case (wchar_t)'?':
case (wchar_t)'}':
case (wchar_t)']':
raiseError();
break;
default:
if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch));
else next = registerNode(new NFACharUNode(ch));
break;
}
if (next) cur = cur->next = quantify(next);
}
if (inParen) raiseError();
else
{
if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode);
if (end) *end = cur;
}
flags = oldFlags;
if (error) return NULL;
return start;
}
WCPattern * WCPattern::compile(const std::wstring & pattern, const unsigned long mode)
{
WCPattern * p = new WCPattern(pattern);
NFAUNode * end;
p->flags = mode;
if ((mode & WCPattern::LITERAL) != 0)
{
p->head = p->registerNode(new NFAStartUNode);
if ((mode & WCPattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteUNode(pattern));
else p->head->next = p->registerNode(new NFAQuoteUNode(pattern));
p->head->next->next = p->registerNode(new NFAEndUNode);
}
else
{
p->head = p->parse(0, 0, &end);
if (!p->head)
{
delete p;
p = NULL;
}
else
{
if (!(p->head && p->head->isStartOfInputNode()))
{
NFAUNode * n = p->registerNode(new NFAStartUNode);
n->next = p->head;
p->head = n;
}
end->next = p->registerNode(new NFAEndUNode);
}
}
if (p != NULL)
{
p->matcher = new WCMatcher(p, L"");
}
return p;
}
WCPattern * WCPattern::compileAndKeep(const std::wstring & pattern, const unsigned long mode)
{
WCPattern * ret = NULL;
std::map<std::wstring, WCPattern*>::iterator it = compiledWCPatterns.find(pattern);
if (it != compiledWCPatterns.end())
{
ret = it->second;
}
else
{
ret = compile(pattern, mode);
compiledWCPatterns[pattern] = ret;
}
return ret;
}
std::wstring WCPattern::replace(const std::wstring & pattern, const std::wstring & str,
const std::wstring & replacementText, const unsigned long mode)
{
std::wstring ret;
WCPattern * p = WCPattern::compile(pattern, mode);
if (p)
{
ret = p->replace(str, replacementText);
delete p;
}
return ret;
}
std::vector<std::wstring> WCPattern::split(const std::wstring & pattern, const std::wstring & str, const bool keepEmptys,
const unsigned long limit, const unsigned long mode)
{
std::vector<std::wstring> ret;
WCPattern * p = WCPattern::compile(pattern, mode);
if (p)
{
ret = p->split(str, keepEmptys, limit);
delete p;
}
return ret;
}
std::vector<std::wstring> WCPattern::findAll(const std::wstring & pattern, const std::wstring & str, const unsigned long mode)
{
std::vector<std::wstring> ret;
WCPattern * p = WCPattern::compile(pattern, mode);
if (p)
{
ret = p->findAll(str);
delete p;
}
return ret;
}
bool WCPattern::matches(const std::wstring & pattern, const std::wstring & str, const unsigned long mode)
{
bool ret = 0;
WCPattern * p = compile(pattern, mode);
if (p)
{
ret = p->matches(str);
delete p;
}
return ret;
}
bool WCPattern::registerWCPattern(const std::wstring & name, const std::wstring & pattern, const unsigned long mode)
{
WCPattern * p = WCPattern::compile(pattern, mode);
if (!p) return 0;
WCPattern::registeredWCPatterns[name] = std::make_pair(pattern, mode);
delete p;
return 1;
}
void WCPattern::unregisterWCPatterns()
{
registeredWCPatterns.clear();
}
void WCPattern::clearWCPatternCache()
{
std::map<std::wstring, WCPattern*>::iterator it;
for (it = compiledWCPatterns.begin(); it != compiledWCPatterns.end(); ++it)
{
delete it->second;
}
compiledWCPatterns.clear();
}
std::pair<std::wstring, int> WCPattern::findNthMatch(const std::wstring & pattern, const std::wstring & str,
const int matchNum, const unsigned long mode)
{
std::pair<std::wstring, int> ret;
WCPattern * p = WCPattern::compile(pattern, mode);
ret.second = -1;
if (p)
{
int i = -1;
p->matcher->setString(str);
while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
if (i == matchNum && p->matcher->getStartingIndex() >= 0)
{
ret.first = p->matcher->getGroup(0);
ret.second = p->matcher->getStartingIndex();
}
delete p;
}
return ret;
}
WCPattern::~WCPattern()
{
/*
nodes.clear();
if (head) head->findAllNodes(nodes);
*/
if (matcher) delete matcher;
for (std::map<NFAUNode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it) delete it->first;
}
std::wstring WCPattern::replace(const std::wstring & str, const std::wstring & replacementText)
{
int li = 0;
std::wstring ret = L"";
matcher->setString(str);
while (matcher->findNextMatch())
{
ret += str.substr(li, matcher->getStartingIndex() - li);
ret += matcher->replaceWithGroups(replacementText);
li = matcher->getEndingIndex();
}
ret += str.substr(li);
return ret;
}
std::vector<std::wstring> WCPattern::split(const std::wstring & str, const bool keepEmptys, const unsigned long limit)
{
unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
int li = 0;
std::vector<std::wstring> ret;
matcher->setString(str);
while (matcher->findNextMatch() && ret.size() < lim)
{
if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(L"");
if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
{
if (li != matcher->getStartingIndex() || keepEmptys)
{
ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
}
li = matcher->getEndingIndex();
}
}
if (li < (int)str.size()) ret.push_back(str.substr(li));
return ret;
}
std::vector<std::wstring> WCPattern::findAll(const std::wstring & str)
{
matcher->setString(str);
return matcher->findAll();
}
bool WCPattern::matches(const std::wstring & str)
{
matcher->setString(str);
return matcher->matches();
}
unsigned long WCPattern::getFlags() const
{
return flags;
}
std::wstring WCPattern::getWCPattern() const
{
return pattern;
}
WCMatcher * WCPattern::createWCMatcher(const std::wstring & str)
{
return new WCMatcher(this, str);
}
// NFAUNode
NFAUNode::NFAUNode() { next = NULL; }
NFAUNode::~NFAUNode() { }
void NFAUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
if (soFar.find(this) == soFar.end()) return;
soFar[this] = 1;
if (next) next->findAllNodes(soFar);
}
// NFACharUNode
NFACharUNode::NFACharUNode(const wchar_t c) { ch = c; }
int NFACharUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1);
return -1;
}
// NFACICharUNode
NFACICharUNode::NFACICharUNode(const wchar_t c) { ch = towlower(c); }
int NFACICharUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
if (curInd < (int)str.size() && (wchar_t)towlower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1);
return -1;
}
// NFAStartUNode
NFAStartUNode::NFAStartUNode() { }
int NFAStartUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
int ret = -1, ci = curInd;
matcher->starts[0] = curInd;
if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) == (unsigned int)WCMatcher::MATCH_ENTIRE_STRING)
{
if (curInd != 0)
{
matcher->starts[0] = -1;
return -1;
}
return next->match(str, matcher, 0);
}
while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size())
{
matcher->clearGroups();
matcher->starts[0] = ++ci;
}
if (ret < 0) matcher->starts[0] = -1;
return ret;
}
// NFAEndUNode
NFAEndUNode::NFAEndUNode() { }
int NFAEndUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
matcher->ends[0] = curInd;
if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) != 0)
{
if (curInd == (int)str.size()) return curInd;
matcher->ends[0] = -1;
return -1;
}
return curInd;
}
// NFAQuantifierUNode
void NFAQuantifierUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
inner->findAllNodes(soFar);
NFAUNode::findAllNodes(soFar);
}
NFAQuantifierUNode::NFAQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
{
inner = internal;
inner->next = pat->registerNode(new NFAAcceptUNode);
min = (minMatch < WCPattern::MIN_QMATCH) ? WCPattern::MIN_QMATCH : minMatch;
max = (maxMatch > WCPattern::MAX_QMATCH) ? WCPattern::MAX_QMATCH : maxMatch;
}
int NFAQuantifierUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
int i0, i1, i2 = 0;
i0 = i1 = curInd;
while (i2 < min)
{
++i2;
i1 = inner->match(str, matcher, i0);
if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
i0 = i1;
}
return i1;
}
// NFAGreedyQuantifierUNode
NFAGreedyQuantifierUNode::NFAGreedyQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
: NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
int NFAGreedyQuantifierUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
int t = NFAQuantifierUNode::match(str, matcher, curInd);
if (t != -1) return matchInternal(str, matcher, t, min);
return t;
}
int NFAGreedyQuantifierUNode::matchInternal(const std::wstring & str, WCMatcher * matcher, const int curInd, const int soFar) const
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -