📄 pattern.cpp
字号:
{
cur->next = t2;
cur = t2->next;
}
else
{
cur->next = t1;
cur = end;
}
}
break;
case ')':
if (!inParen) raiseError();
else if (inOr)
{
--curInd;
cur = cur->next = registerNode(new NFAAcceptNode);
flags = oldFlags;
return start;
}
else
{
if (ahead)
{
cur = cur->next = registerNode(new NFAAcceptNode);
flags = oldFlags;
return *end = registerNode(new NFALookAheadNode(start, pos));
}
else if (indep)
{
cur = cur->next = registerNode(new NFAAcceptNode);
flags = oldFlags;
return *end = registerNode(new NFAPossessiveQuantifierNode(this, start, 1, 1));
}
else // capping or noncapping, it doesnt matter
{
*end = cur = cur->next = registerNode(new NFAGroupTailNode(grc));
next = quantifyGroup(start, *end, grc);
if (next)
{
start = next;
*end = next->next;
}
flags = oldFlags;
return start;
}
}
break;
case '{': // registered pattern
cur->next = parseRegisteredPattern(&next);
if (cur->next) cur = next;
break;
case '*':
case '+':
case '?':
case '}':
case ']':
raiseError();
break;
default:
if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharNode(ch));
else next = registerNode(new NFACharNode(ch));
break;
}
if (next)
{
cur = cur->next = quantify(next);
}
}
if (inParen) raiseError();
else
{
if (inOr) cur = cur->next = registerNode(new NFAAcceptNode);
if (end) *end = cur;
}
flags = oldFlags;
if (error) return NULL;
return start;
}
Pattern * Pattern::compile(const std::string & pattern, const unsigned long mode)
{
Pattern * p = new Pattern(pattern);
NFANode * end;
p->flags = mode;
if ((mode & Pattern::LITERAL) != 0)
{
p->head = p->registerNode(new NFAStartNode);
if ((mode & Pattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteNode(pattern));
else p->head->next = p->registerNode(new NFAQuoteNode(pattern));
p->head->next->next = p->registerNode(new NFAEndNode);
}
else
{
p->head = p->parse(0, 0, &end);
if (!p->head)
{
delete p;
p = NULL;
}
else
{
if (!(p->head && p->head->isStartOfInputNode()))
{
NFANode * n = p->registerNode(new NFAStartNode);
n->next = p->head;
p->head = n;
}
end->next = p->registerNode(new NFAEndNode);
}
}
if (p != NULL)
{
p->matcher = new Matcher(p, "");
}
return p;
}
Pattern * Pattern::compileAndKeep(const std::string & pattern, const unsigned long mode)
{
Pattern * ret = NULL;
std::map<std::string, Pattern*>::iterator it = compiledPatterns.find(pattern);
if (it != compiledPatterns.end())
{
ret = it->second;
}
else
{
ret = compile(pattern, mode);
compiledPatterns[pattern] = ret;
}
return ret;
}
std::string Pattern::replace(const std::string & pattern, const std::string & str,
const std::string & replacementText, const unsigned long mode)
{
std::string ret;
Pattern * p = Pattern::compile(pattern, mode);
if (p)
{
ret = p->replace(str, replacementText);
delete p;
}
return ret;
}
std::vector<std::string> Pattern::split(const std::string & pattern, const std::string & str, const bool keepEmptys,
const unsigned long limit, const unsigned long mode)
{
std::vector<std::string> ret;
Pattern * p = Pattern::compile(pattern, mode);
if (p)
{
ret = p->split(str, keepEmptys, limit);
delete p;
}
return ret;
}
std::vector<std::string> Pattern::findAll(const std::string & pattern, const std::string & str, const unsigned long mode)
{
std::vector<std::string> ret;
Pattern * p = Pattern::compile(pattern, mode);
if (p)
{
ret = p->findAll(str);
delete p;
}
return ret;
}
bool Pattern::matches(const std::string & pattern, const std::string & str, const unsigned long mode)
{
bool ret = 0;
Pattern * p = compile(pattern, mode);
if (p)
{
ret = p->matches(str);
delete p;
}
return ret;
}
bool Pattern::registerPattern(const std::string & name, const std::string & pattern, const unsigned long mode)
{
Pattern * p = Pattern::compile(pattern, mode);
if (!p) return 0;
Pattern::registeredPatterns[name] = std::make_pair(pattern, mode);
delete p;
return 1;
}
void Pattern::unregisterPatterns()
{
registeredPatterns.clear();
}
void Pattern::clearPatternCache()
{
std::map<std::string, Pattern*>::iterator it;
for (it = compiledPatterns.begin(); it != compiledPatterns.end(); ++it)
{
delete it->second;
}
compiledPatterns.clear();
}
std::pair<std::string, int> Pattern::findNthMatch(const std::string & pattern, const std::string & str,
const int matchNum, const unsigned long mode)
{
std::pair<std::string, int> ret;
Pattern * p = Pattern::compile(pattern, mode);
ret.second = -1;
if (p)
{
int i = -1;
p->matcher->setString(str);
while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
if (i == matchNum && p->matcher->getStartingIndex() >= 0)
{
ret.first = p->matcher->getGroup(0);
ret.second = p->matcher->getStartingIndex();
}
delete p;
}
return ret;
}
Pattern::~Pattern()
{
if (matcher) delete matcher;
for (std::map<NFANode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it)
{
delete it->first;
}
}
std::string Pattern::replace(const std::string & str, const std::string & replacementText)
{
int li = 0;
std::string ret = "";
matcher->setString(str);
while (matcher->findNextMatch())
{
ret += str.substr(li, matcher->getStartingIndex() - li);
ret += matcher->replaceWithGroups(replacementText);
li = matcher->getEndingIndex();
}
ret += str.substr(li);
return ret;
}
std::vector<std::string> Pattern::split(const std::string & str, const bool keepEmptys, const unsigned long limit)
{
unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
int li = 0;
std::vector<std::string> ret;
matcher->setString(str);
while (matcher->findNextMatch() && ret.size() < lim)
{
if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back("");
if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
{
if (li != matcher->getStartingIndex() || keepEmptys)
{
ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
}
li = matcher->getEndingIndex();
}
}
if (li < (int)str.size()) ret.push_back(str.substr(li));
return ret;
}
std::vector<std::string> Pattern::findAll(const std::string & str)
{
matcher->setString(str);
return matcher->findAll();
}
bool Pattern::matches(const std::string & str)
{
matcher->setString(str);
return matcher->matches();
}
unsigned long Pattern::getFlags() const
{
return flags;
}
std::string Pattern::getPattern() const
{
return pattern;
}
Matcher * Pattern::createMatcher(const std::string & str)
{
return new Matcher(this, str);
}
// NFANode
NFANode::NFANode() { next = NULL; }
NFANode::~NFANode() { }
void NFANode::findAllNodes(std::map<NFANode*, bool> & soFar)
{
if (soFar.find(this) == soFar.end()) return;
soFar[this] = 1;
if (next) next->findAllNodes(soFar);
}
// NFACharNode
NFACharNode::NFACharNode(const char c) { ch = c; }
int NFACharNode::match(const std::string & str, Matcher * matcher, const int curInd) const
{
if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1);
return -1;
}
// NFACICharNode
NFACICharNode::NFACICharNode(const char c) { ch = tolower(c); }
int NFACICharNode::match(const std::string & str, Matcher * matcher, const int curInd) const
{
if (curInd < (int)str.size() && tolower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1);
return -1;
}
// NFAStartNode
NFAStartNode::NFAStartNode() { }
int NFAStartNode::match(const std::string & str, Matcher * matcher, const int curInd) const
{
int ret = -1, ci = curInd;
matcher->starts[0] = curInd;
if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) == (unsigned int)Matcher::MATCH_ENTIRE_STRING)
{
if (curInd != 0)
{
matcher->starts[0] = -1;
return -1;
}
return next->match(str, matcher, 0);
}
while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size())
{
matcher->clearGroups();
matcher->starts[0] = ++ci;
}
if (ret < 0) matcher->starts[0] = -1;
return ret;
}
// NFAEndNode
NFAEndNode::NFAEndNode() { }
int NFAEndNode::match(const std::string & str, Matcher * matcher, const int curInd) const
{
matcher->ends[0] = curInd;
if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) != 0)
{
if (curInd == (int)str.size()) return curInd;
matcher->ends[0] = -1;
return -1;
}
return curInd;
}
// NFAQuantifierNode
void NFAQuantifierNode::findAllNodes(std::map<NFANode*, bool> & soFar)
{
inner->findAllNodes(soFar);
NFANode::findAllNodes(soFar);
}
NFAQuantifierNode::NFAQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
{
inner = internal;
inner->next = pat->registerNode(new NFAAcceptNode);
min = (minMatch < Pattern::MIN_QMATCH) ? Pattern::MIN_QMATCH : minMatch;
max = (maxMatch > Pattern::MAX_QMATCH) ? Pattern::MAX_QMATCH : maxMatch;
}
int NFAQuantifierNode::match(const std::string & str, Matcher * matcher, const int curInd) const
{
int i0, i1, i2 = 0;
i0 = i1 = curInd;
while (i2 < min)
{
++i2;
i1 = inner->match(str, matcher, i0);
if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
i0 = i1;
}
return i1;
}
// NFAGreedyQuantifierNode
NFAGreedyQuantifierNode::NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch)
: NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
int NFAGreedyQuantifierNode::match(const std::string & str, Matcher * matcher, const int curInd) const
{
int t = NFAQuantifierNode::match(str, matcher, curInd);
if (t != -1) return matchInternal(str, matcher, t, min);
return t;
}
int NFAGreedyQuantifierNode::matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar) const
{
if (soFar >= max) return next->match(str, matcher, curInd);
int i, j;
i = inner->match(str, matcher, curInd);
if (i != -1)
{
j = matchInternal(str, matcher, i, soFar + 1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -