📄 recompiler.java
字号:
// Add end after closured subexpr
setNextOfEnd(ret, node(RE.OP_END, 0));
// Actually do the closure now
switch (closureType)
{
case '?':
nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
break;
case '*':
nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
break;
case '+':
nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret);
break;
}
// Point to the expr after the closure
setNextOfEnd(ret, lenInstruction);
}
return ret;
}
/**
* Compile one branch of an or operator (implements concatenation)
* @param flags Flags passed by reference
* @return Pointer to branch node
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
*/
int branch(int[] flags) throws RESyntaxException
{
// Get each possibly closured piece and concat
int node;
int ret = node(RE.OP_BRANCH, 0);
int chain = -1;
int[] closureFlags = new int[1];
boolean nullable = true;
while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')')
{
// Get new node
closureFlags[0] = NODE_NORMAL;
node = closure(closureFlags);
if (closureFlags[0] == NODE_NORMAL)
{
nullable = false;
}
// If there's a chain, append to the end
if (chain != -1)
{
setNextOfEnd(chain, node);
}
// Chain starts at current
chain = node;
}
// If we don't run loop, make a nothing node
if (chain == -1)
{
node(RE.OP_NOTHING, 0);
}
// Set nullable flag for this branch
if (nullable)
{
flags[0] |= NODE_NULLABLE;
}
return ret;
}
/**
* Compile an expression with possible parens around it. Paren matching
* is done at this level so we can tie the branch tails together.
* @param flags Flag value passed by reference
* @return Node index of expression in instruction array
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
*/
int expr(int[] flags) throws RESyntaxException
{
// Create open paren node unless we were called from the top level (which has no parens)
boolean paren = false;
int ret = -1;
int closeParens = parens;
if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(')
{
idx++;
paren = true;
ret = node(RE.OP_OPEN, parens++);
}
flags[0] &= ~NODE_TOPLEVEL;
// Create a branch node
int branch = branch(flags);
if (ret == -1)
{
ret = branch;
}
else
{
setNextOfEnd(ret, branch);
}
// Loop through branches
while (idx < len && pattern.charAt(idx) == '|')
{
idx++;
branch = branch(flags);
setNextOfEnd(ret, branch);
}
// Create an ending node (either a close paren or an OP_END)
int end;
if (paren)
{
if (idx < len && pattern.charAt(idx) == ')')
{
idx++;
}
else
{
syntaxError("Missing close paren");
}
end = node(RE.OP_CLOSE, closeParens);
}
else
{
end = node(RE.OP_END, 0);
}
// Append the ending node to the ret nodelist
setNextOfEnd(ret, end);
// Hook the ends of each branch to the end node
for (int next = -1, i = ret; next != 0; next = instruction[i + RE.offsetNext], i += next)
{
// If branch, make the end of the branch's operand chain point to the end node.
if (instruction[i + RE.offsetOpcode] == RE.OP_BRANCH)
{
setNextOfEnd(i + RE.nodeSize, end);
}
}
// Return the node list
return ret;
}
/**
* Compiles a regular expression pattern into a program runnable by the pattern
* matcher class 'RE'.
* @param pattern Regular expression pattern to compile (see RECompiler class
* for details).
* @return A compiled regular expression program.
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
* @see RECompiler
* @see RE
*/
public REProgram compile(String pattern) throws RESyntaxException
{
// Initialize variables for compilation
this.pattern = pattern; // Save pattern in instance variable
len = pattern.length(); // Precompute pattern length for speed
idx = 0; // Set parsing index to the first character
lenInstruction = 0; // Set emitted instruction count to zero
parens = 1; // Set paren level to 1 (the implicit outer parens)
brackets = 0; // No bracketed closures yet
// Initialize pass by reference flags value
int[] flags = { NODE_TOPLEVEL };
// Parse expression
expr(flags);
// Should be at end of input
if (idx != len)
{
if (pattern.charAt(idx) == ')')
{
syntaxError("Unmatched close paren");
}
syntaxError("Unexpected input remains");
}
// Return the result
char[] ins = new char[lenInstruction];
System.arraycopy(instruction, 0, ins, 0, lenInstruction);
return new REProgram(ins);
}
/**
* Local, nested class for maintaining character ranges for character classes.
*/
class RERange
{
int size = 16; // Capacity of current range arrays
int[] minRange = new int[size]; // Range minima
int[] maxRange = new int[size]; // Range maxima
int num = 0; // Number of range array elements in use
/**
* Deletes the range at a given index from the range lists
* @param index Index of range to delete from minRange and maxRange arrays.
*/
void delete(int index)
{
// Return if no elements left or index is out of range
if (num == 0 || index >= num)
{
return;
}
// Move elements down
while (index++ < num)
{
if (index - 1 >= 0)
{
minRange[index-1] = minRange[index];
maxRange[index-1] = maxRange[index];
}
}
// One less element now
num--;
}
/**
* Merges a range into the range list, coalescing ranges if possible.
* @param min Minimum end of range
* @param max Maximum end of range
*/
void merge(int min, int max)
{
// Loop through ranges
for (int i = 0; i < num; i++)
{
// Min-max is subsumed by minRange[i]-maxRange[i]
if (min >= minRange[i] && max <= maxRange[i])
{
return;
}
// Min-max subsumes minRange[i]-maxRange[i]
else if (min <= minRange[i] && max >= maxRange[i])
{
delete(i);
merge(min, max);
return;
}
// Min is in the range, but max is outside
else if (min >= minRange[i] && min <= maxRange[i])
{
delete(i);
min = minRange[i];
merge(min, max);
return;
}
// Max is in the range, but min is outside
else if (max >= minRange[i] && max <= maxRange[i])
{
delete(i);
max = maxRange[i];
merge(min, max);
return;
}
}
// Must not overlap any other ranges
if (num >= size)
{
size *= 2;
int[] newMin = new int[size];
int[] newMax = new int[size];
System.arraycopy(minRange, 0, newMin, 0, num);
System.arraycopy(maxRange, 0, newMax, 0, num);
minRange = newMin;
maxRange = newMax;
}
minRange[num] = min;
maxRange[num] = max;
num++;
}
/**
* Removes a range by deleting or shrinking all other ranges
* @param min Minimum end of range
* @param max Maximum end of range
*/
void remove(int min, int max)
{
// Loop through ranges
for (int i = 0; i < num; i++)
{
// minRange[i]-maxRange[i] is subsumed by min-max
if (minRange[i] >= min && maxRange[i] <= max)
{
delete(i);
i--;
return;
}
// min-max is subsumed by minRange[i]-maxRange[i]
else if (min >= minRange[i] && max <= maxRange[i])
{
int minr = minRange[i];
int maxr = maxRange[i];
delete(i);
if (minr < min - 1)
{
merge(minr, min - 1);
}
if (max + 1 < maxr)
{
merge(max + 1, maxr);
}
return;
}
// minRange is in the range, but maxRange is outside
else if (minRange[i] >= min && minRange[i] <= max)
{
minRange[i] = max + 1;
return;
}
// maxRange is in the range, but minRange is outside
else if (maxRange[i] >= min && maxRange[i] <= max)
{
maxRange[i] = min - 1;
return;
}
}
}
/**
* Includes (or excludes) the range from min to max, inclusive.
* @param min Minimum end of range
* @param max Maximum end of range
* @param include True if range should be included. False otherwise.
*/
void include(int min, int max, boolean include)
{
if (include)
{
merge(min, max);
}
else
{
remove(min, max);
}
}
/**
* Includes a range with the same min and max
* @param minmax Minimum and maximum end of range (inclusive)
* @param include True if range should be included. False otherwise.
*/
void include(char minmax, boolean include)
{
include(minmax, minmax, include);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -