📄 pattern.cpp
字号:
for (i = 0; i <= 0x1F; ++i) s[i] = i;
s[0x20] = 0x7F;
curInd += 7;
return s;
}
if (s7 == "{ASCII}")
{
std::string s(0x80, ' ');
for (int i = 0; i < 0x80; ++i) s[i] = i;
curInd += 7;
return s;
}
if (pattern.substr(curInd, 8) == "{XDigit}") { curInd += 8; return "abcdefABCDEF0123456789"; }
raiseError();
return "";
}
NFANode * Pattern::parseBackref()
{
#define is_dig(x) ((x) >= '0' && (x) <= '9')
#define to_int(x) ((x) - '0')
int ci = curInd;
int oldRef = 0, ref = 0;
while (ci < (int)pattern.size() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount))
{
oldRef = ref;
ref = ref * 10 + to_int(pattern[ci++]);
}
if (ci == (int)pattern.size())
{
oldRef = ref;
++ci;
}
if (oldRef < 0 || ci <= curInd)
{
raiseError();
return registerNode(new NFAReferenceNode(-1));
}
curInd = ci;
return registerNode(new NFAReferenceNode(ref));
#undef is_dig
#undef to_int
}
std::string Pattern::parseOctal()
{
#define islowoc(x) ((x) >= '0' && (x) <= '3')
#define isoc(x) ((x) >= '0' && (x) <= '7')
#define fromoc(x) ((x) - '0')
int ci = curInd;
char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1;
char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1;
char ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : -1;
std::string s = " ";
if (islowoc(ch1) && isoc(ch2))
{
curInd += 2;
s[0] = fromoc(ch1) * 8 + fromoc(ch2);
if (isoc(ch3))
{
++curInd;
s[0] = s[0] * 8 + fromoc(ch3);
}
}
else if (isoc(ch1) && isoc(ch2))
{
curInd += 2;
s[0] = fromoc(ch1) * 8 + fromoc(ch2);
}
else raiseError();
return s;
#undef islowoc
#undef isoc
#undef fromoc
}
std::string Pattern::parseHex()
{
#define to_low(x) (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x))
#define is_dig(x) ((x) >= '0' && (x) <= '9')
#define is_hex(x) (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f'))
#define to_int(x) ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10))
int ci = curInd;
char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1;
char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1;
std::string s = " ";
if (is_hex(ch1) && is_hex(ch2))
{
curInd += 2;
s[0] = (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F);
}
return s;
#undef to_low
#undef is_dig
#undef is_hex
#undef to_int
}
std::string Pattern::parseEscape(bool & inv, bool & quo)
{
char ch = pattern[curInd++];
std::string classes = "";
if (curInd > (int)pattern.size())
{
raiseError();
return NULL;
}
quo = 0;
inv = 0;
switch (ch)
{
case 'p': classes = parsePosix(); break;
case 'P': classes = "!!"; classes += parsePosix(); break;
case 'd': classes = "0123456789"; break;
case 'D': classes = "!!0123456789"; break;
case 's': classes = " \t\r\n\f"; break;
case 'S': classes = "!! \t\r\n\f"; break;
case 'w': classes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
case 'W': classes = "!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break;
case '0': classes = parseOctal(); break;
case 'x': classes = parseHex(); break;
case 'Q': quo = 1; break;
case 't': classes = "\t"; break;
case 'r': classes = "\r"; break;
case 'n': classes = "\n"; break;
case 'f': classes = "\f"; break;
case 'a': classes = "\a"; break;
case 'e': classes = "\r"; break;
default: classes = " "; classes[0] = ch; break;
}
if (classes.substr(0, 2) == "!!")
{
classes = classes.substr(2);
inv = 1;
}
return classes;
}
NFANode * Pattern::parseRegisteredPattern(NFANode ** end)
{
int i, j;
std::string s;
NFANode * ret = NULL;
for (i = curInd; i < (int)pattern.size() && pattern[i] != '}'; ++i) { }
if (pattern[i] != '}') { raiseError(); return NULL; }
if (i == curInd + 1) { raiseError(); return NULL; } // {}
if (
!(
(pattern[curInd] >= 'a' && pattern[curInd] <= 'z') ||
(pattern[curInd] >= 'A' && pattern[curInd] <= 'Z') ||
(pattern[curInd] == '_')
)
)
{
raiseError();
return NULL;
}
for (j = curInd; !error && j < i; ++j)
{
if (
!(
(pattern[j] >= 'a' && pattern[j] <= 'z') ||
(pattern[j] >= 'A' && pattern[j] <= 'Z') ||
(pattern[j] >= '0' && pattern[j] <= '9') ||
(pattern[j] == '_')
)
)
{
raiseError();
return NULL;
}
}
s = pattern.substr(curInd, i - curInd);
if (registeredPatterns.find(s) == registeredPatterns.end()) raiseError();
else
{
unsigned long oflags = flags;
std::string op = pattern;
int ci = i + 1;
pattern = registeredPatterns[s].first;
curInd = 0;
flags = registeredPatterns[s].second;
--groupCount;
ret = parse(0, 0, end);
pattern = op;
curInd = ci;
flags = oflags;
}
if (error) { *end = ret = NULL; }
return ret;
}
// look behind should interpret everything as a literal (except \\) since the
// pattern must have a concrete length
NFANode * Pattern::parseBehind(const bool pos, NFANode ** end)
{
std::string t = "";
while (curInd < (int)pattern.size() && pattern[curInd] != ')')
{
char ch = pattern[curInd++];
t += " ";
if (ch == '\\')
{
if (curInd + 1 >= (int)pattern.size())
{
raiseError();
return *end = registerNode(new NFACharNode(' '));
}
ch = pattern[curInd++];
}
t[t.size() - 1] = ch;
}
if (curInd >= (int)pattern.size() || pattern[curInd] != ')') raiseError();
else ++curInd;
return *end = registerNode(new NFALookBehindNode(t, pos));
}
NFANode * Pattern::parseQuote()
{
bool done = 0;
std::string s = "";
while (!done)
{
if (curInd >= (int)pattern.size())
{
raiseError();
done = 1;
}
else if (pattern.substr(curInd, 2) == "\\E")
{
curInd += 2;
done = 1;
}
else if (pattern[curInd] == '\\')
{
s += " ";
s[s.size() - 1] = pattern[++curInd];
++curInd;
}
else
{
s += " ";
s[s.size() - 1] = pattern[curInd++];
}
}
if ((flags & Pattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteNode(s));
return registerNode(new NFAQuoteNode(s));
}
NFANode * Pattern::parse(const bool inParen, const bool inOr, NFANode ** end)
{
NFANode * start, * cur, * next = NULL;
std::string t;
int grc = groupCount++;
bool inv, quo;
bool ahead = 0, pos = 0, noncap = 0, indep = 0;
unsigned long oldFlags = flags;
if (inParen)
{
if (pattern[curInd] == '?')
{
++curInd;
--groupCount;
if (pattern[curInd] == ':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; }
else if (pattern[curInd] == '=') { ++curInd; ahead = 1; pos = 1; }
else if (pattern[curInd] == '!') { ++curInd; ahead = 1; pos = 0; }
else if (pattern.substr(curInd, 2) == "<=") { curInd += 2; return parseBehind(1, end); }
else if (pattern.substr(curInd, 2) == "<!") { curInd += 2; return parseBehind(0, end); }
else if (pattern[curInd] == '>') { ++curInd; indep = 1; }
else
{
bool negate = false, done = false;
while (!done)
{
if (curInd >= (int)pattern.size())
{
raiseError();
return NULL;
}
else if (negate)
{
switch (pattern[curInd])
{
case 'i': flags &= ~Pattern::CASE_INSENSITIVE; break;
case 'd': flags &= ~Pattern::UNIX_LINE_MODE; break;
case 'm': flags &= ~Pattern::MULTILINE_MATCHING; break;
case 's': flags &= ~Pattern::DOT_MATCHES_ALL; break;
case ':': done = true; break;
case ')':
++curInd;
*end = registerNode(new NFALookBehindNode("", true));
return *end;
case '-':
default: raiseError(); return NULL;
}
}
else
{
switch (pattern[curInd])
{
case 'i': flags |= Pattern::CASE_INSENSITIVE; break;
case 'd': flags |= Pattern::UNIX_LINE_MODE; break;
case 'm': flags |= Pattern::MULTILINE_MATCHING; break;
case 's': flags |= Pattern::DOT_MATCHES_ALL; break;
case ':': done = true; break;
case '-': negate = true; break;
case ')':
++curInd;
*end = registerNode(new NFALookBehindNode("", true));
return *end;
default: raiseError(); return NULL;
}
}
++curInd;
}
noncap = 1;
grc = --nonCapGroupCount;
}
if (noncap) cur = start = registerNode(new NFAGroupHeadNode(grc));
else cur = start = registerNode(new NFASubStartNode);
}
else cur = start = registerNode(new NFAGroupHeadNode(grc));
}
else cur = start = registerNode(new NFASubStartNode);
while (curInd < (int)pattern.size())
{
char ch = pattern[curInd++];
next = NULL;
if (error) return NULL;
switch (ch)
{
case '^':
if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineNode);
else next = registerNode(new NFAStartOfInputNode);
break;
case '$':
if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineNode);
else next = registerNode(new NFAEndOfInputNode(0));
break;
case '|':
--groupCount;
cur->next = registerNode(new NFAAcceptNode);
cur = start = registerNode(new NFAOrNode(start, parse(inParen, 1)));
break;
case '\\':
if (curInd < (int)pattern.size())
{
bool eoi = 0;
switch (pattern[curInd])
{
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': next = parseBackref(); break;
case 'A': ++curInd; next = registerNode(new NFAStartOfInputNode); break;
case 'B': ++curInd; next = registerNode(new NFAWordBoundaryNode(0)); break;
case 'b': ++curInd; next = registerNode(new NFAWordBoundaryNode(1)); break;
case 'G': ++curInd; next = registerNode(new NFAEndOfMatchNode); break;
case 'Z': eoi = 1;
case 'z': ++curInd; next = registerNode(new NFAEndOfInputNode(eoi)); break;
default:
t = parseEscape(inv, quo);
if (!quo)
{
if (t.size() > 1 || inv)
{
if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassNode(t, inv));
else next = registerNode(new NFAClassNode(t, inv));
}
else
{
next = registerNode(new NFACharNode(t[0]));
}
}
else
{
next = parseQuote();
}
}
}
else raiseError();
break;
case '[':
if ((flags & Pattern::CASE_INSENSITIVE) == 0)
{
NFAClassNode * clazz = new NFAClassNode();
std::string s = parseClass();
for (int i = 0; i < (int)s.size(); ++i) clazz->vals[s[i]] = 1;
next = registerNode(clazz);
}
else
{
NFACIClassNode * clazz = new NFACIClassNode();
std::string s = parseClass();
for (int i = 0; i < (int)s.size(); ++i) clazz->vals[tolower(s[i])] = 1;
next = registerNode(clazz);
}
break;
case '.':
{
bool useN = 1, useR = 1;
NFAClassNode * clazz = new NFAClassNode(1);
if ((flags & Pattern::UNIX_LINE_MODE) != 0) useR = 0;
if ((flags & Pattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
if (useN) clazz->vals['\n'] = 1;
if (useR) clazz->vals['\r'] = 1;
next = registerNode(clazz);
}
break;
case '(':
{
NFANode * end, * t1, * t2;
t1 = parse(1, 0, &end);
if (!t1) raiseError();
else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -