📄 recompiler.java
字号:
// Loop while we've got input
atomLoop:
while (idx < len)
{
// Is there a next char?
if ((idx + 1) < len)
{
char c = pattern.charAt(idx + 1);
// If the next 'char' is an escape, look past the whole escape
if (pattern.charAt(idx) == '\\')
{
int idxEscape = idx;
escape();
if (idx < len)
{
c = pattern.charAt(idx);
}
idx = idxEscape;
}
// Switch on next char
switch (c)
{
case '{':
case '?':
case '*':
case '+':
// If the next character is a closure operator and our atom is non-empty, the
// current character should bind to the closure operator rather than the atom
if (lenAtom != 0)
{
break atomLoop;
}
}
}
// Switch on current char
switch (pattern.charAt(idx))
{
case ']':
case '^':
case '$':
case '.':
case '[':
case '(':
case ')':
case '|':
break atomLoop;
case '{':
case '?':
case '*':
case '+':
// We should have an atom by now
if (lenAtom == 0)
{
// No atom before closure
syntaxError("Missing operand to closure");
}
break atomLoop;
case '\\':
{
// Get the escaped character (advances input automatically)
int idxBeforeEscape = idx;
char c = escape();
// Check if it's a simple escape (as opposed to, say, a backreference)
if ((c & ESC_MASK) == ESC_MASK)
{
// Not a simple escape, so backup to where we were before the escape.
idx = idxBeforeEscape;
break atomLoop;
}
// Add escaped char to atom
emit(c);
lenAtom++;
}
break;
default:
// Add normal character to atom
emit(pattern.charAt(idx++));
lenAtom++;
break;
}
}
// This "shouldn't" happen
if (lenAtom == 0)
{
internalError();
}
// Emit the atom length into the program
instruction[ret + RE.offsetOpdata] = (char)lenAtom;
return ret;
}
/**
* Match a terminal node.
* @param flags Flags
* @return Index of terminal node (closeable)
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
*/
int terminal(int[] flags) throws RESyntaxException
{
switch (pattern.charAt(idx))
{
case RE.OP_EOL:
case RE.OP_BOL:
case RE.OP_ANY:
return node(pattern.charAt(idx++), 0);
case '[':
return characterClass();
case '(':
return expr(flags);
case ')':
syntaxError("Unexpected close paren");
case '|':
internalError();
case ']':
syntaxError("Mismatched class");
case 0:
syntaxError("Unexpected end of input");
case '?':
case '+':
case '{':
case '*':
syntaxError("Missing operand to closure");
case '\\':
{
// Don't forget, escape() advances the input stream!
int idxBeforeEscape = idx;
// Switch on escaped character
switch (escape())
{
case ESC_CLASS:
case ESC_COMPLEX:
flags[0] &= ~NODE_NULLABLE;
return node(RE.OP_ESCAPE, pattern.charAt(idx - 1));
case ESC_BACKREF:
{
char backreference = (char)(pattern.charAt(idx - 1) - '0');
if (parens <= backreference)
{
syntaxError("Bad backreference");
}
flags[0] |= NODE_NULLABLE;
return node(RE.OP_BACKREF, backreference);
}
default:
// We had a simple escape and we want to have it end up in
// an atom, so we back up and fall though to the default handling
idx = idxBeforeEscape;
flags[0] &= ~NODE_NULLABLE;
break;
}
}
}
// Everything above either fails or returns.
// If it wasn't one of the above, it must be the start of an atom.
flags[0] &= ~NODE_NULLABLE;
return atom();
}
/**
* Compile a possibly closured terminal
* @param flags Flags passed by reference
* @return Index of closured node
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
*/
int closure(int[] flags) throws RESyntaxException
{
// Before terminal
int idxBeforeTerminal = idx;
// Values to pass by reference to terminal()
int[] terminalFlags = { NODE_NORMAL };
// Get terminal symbol
int ret = terminal(terminalFlags);
// Or in flags from terminal symbol
flags[0] |= terminalFlags[0];
// Advance input, set NODE_NULLABLE flag and do sanity checks
if (idx >= len)
{
return ret;
}
boolean greedy = true;
char closureType = pattern.charAt(idx);
switch (closureType)
{
case '?':
case '*':
// The current node can be null
flags[0] |= NODE_NULLABLE;
case '+':
// Eat closure character
idx++;
case '{':
// Don't allow blantant stupidity
int opcode = instruction[ret + RE.offsetOpcode];
if (opcode == RE.OP_BOL || opcode == RE.OP_EOL)
{
syntaxError("Bad closure operand");
}
if ((terminalFlags[0] & NODE_NULLABLE) != 0)
{
syntaxError("Closure operand can't be nullable");
}
break;
}
// If the next character is a '?', make the closure non-greedy (reluctant)
if (idx < len && pattern.charAt(idx) == '?')
{
idx++;
greedy = false;
}
if (greedy)
{
// Actually do the closure now
switch (closureType)
{
case '{':
{
// We look for our bracket in the list
boolean found = false;
int i;
allocBrackets();
for (i = 0; i < brackets; i++)
{
if (bracketStart[i] == idx)
{
found = true;
break;
}
}
// If its not in the list we parse the {m,n}
if (!found)
{
if (brackets >= maxBrackets)
{
syntaxError("Too many bracketed closures (limit is 10)");
}
bracketStart[brackets] = idx;
bracket();
bracketEnd[brackets] = idx;
i = brackets++;
}
// If there's a min, rewind stream and reparse
if (--bracketMin[i] > 0)
{
// Rewind stream and run it through again
idx = idxBeforeTerminal;
break;
}
// Do the right thing for maximum ({m,})
if (bracketOpt[i] == bracketFinished)
{
// Drop through now and closure expression.
// We are done with the {m,} expr, so skip rest
closureType = '*';
bracketOpt[i] = 0;
idx = bracketEnd[i];
}
else
if (bracketOpt[i] == bracketUnbounded)
{
idx = idxBeforeTerminal;
bracketOpt[i] = bracketFinished;
break;
}
else
if (bracketOpt[i]-- > 0)
{
// Drop through to optionally close and then 'play it again sam!'
idx = idxBeforeTerminal;
closureType = '?';
}
else
{
// We are done. skip the rest of {m,n} expr
idx = bracketEnd[i];
break;
}
}
// Fall through!
case '?':
case '*':
if (!greedy)
{
break;
}
if (closureType == '?')
{
// X? is compiled as (X|)
nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X
setNextOfEnd(ret, node (RE.OP_BRANCH, 0)); // inserted branch to option
int nothing = node (RE.OP_NOTHING, 0); // which is OP_NOTHING
setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING
setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node
}
if (closureType == '*')
{
// X* is compiled as (X{gotoX}|)
nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X
setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0)); // end of X points to an option
setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0)); // to goto
setNextOfEnd(ret + RE.nodeSize, ret); // the start again
setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); // the other option is
setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING
}
break;
case '+':
{
// X+ is compiled as X({gotoX}|)
int branch;
branch = node(RE.OP_BRANCH, 0); // a new branch
setNextOfEnd(ret, branch); // is added to the end of X
setNextOfEnd(node(RE.OP_GOTO, 0), ret); // one option is to go back to the start
setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); // the other option
setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // is OP_NOTHING
}
break;
}
}
else
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -