📄 recompiler.java
字号:
idx = idxEscape; } // Switch on next char switch (c) { case '{': case '?': case '*': case '+': // If the next character is a closure operator and our atom is non-empty, the // current character should bind to the closure operator rather than the atom if (lenAtom != 0) { break atomLoop; } } } // Switch on current char switch (pattern.charAt(idx)) { case ']': case '^': case '$': case '.': case '[': case '(': case ')': case '|': break atomLoop; case '{': case '?': case '*': case '+': // We should have an atom by now if (lenAtom == 0) { // No atom before closure syntaxError("Missing operand to closure"); } break atomLoop; case '\\': { // Get the escaped character (advances input automatically) int idxBeforeEscape = idx; int c = escape(); // Check if it's a simple escape (as opposed to, say, a backreference) if ((c & ESC_MASK) == ESC_MASK) { // Not a simple escape, so backup to where we were before the escape. idx = idxBeforeEscape; break atomLoop; } // Add escaped char to atom emit((char) c); lenAtom++; } break; default: // Add normal character to atom emit(pattern.charAt(idx++)); lenAtom++; break; } } // This "shouldn't" happen if (lenAtom == 0) { internalError(); } // Emit the atom length into the program instruction[ret + RE.offsetOpdata] = (char)lenAtom; return ret; } /** * Match a terminal node. * @param flags Flags * @return Index of terminal node (closeable) * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int terminal(int[] flags) throws RESyntaxException { switch (pattern.charAt(idx)) { case RE.OP_EOL: case RE.OP_BOL: case RE.OP_ANY: return node(pattern.charAt(idx++), 0); case '[': return characterClass(); case '(': return expr(flags); case ')': syntaxError("Unexpected close paren"); case '|': internalError(); case ']': syntaxError("Mismatched class"); case 0: syntaxError("Unexpected end of input"); case '?': case '+': case '{': case '*': syntaxError("Missing operand to closure"); case '\\': { // Don't forget, escape() advances the input stream! int idxBeforeEscape = idx; // Switch on escaped character switch (escape()) { case ESC_CLASS: case ESC_COMPLEX: flags[0] &= ~NODE_NULLABLE; return node(RE.OP_ESCAPE, pattern.charAt(idx - 1)); case ESC_BACKREF: { char backreference = (char)(pattern.charAt(idx - 1) - '0'); if (parens <= backreference) { syntaxError("Bad backreference"); } flags[0] |= NODE_NULLABLE; return node(RE.OP_BACKREF, backreference); } default: // We had a simple escape and we want to have it end up in // an atom, so we back up and fall though to the default handling idx = idxBeforeEscape; flags[0] &= ~NODE_NULLABLE; break; } } } // Everything above either fails or returns. // If it wasn't one of the above, it must be the start of an atom. flags[0] &= ~NODE_NULLABLE; return atom(); } /** * Compile a possibly closured terminal * @param flags Flags passed by reference * @return Index of closured node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int closure(int[] flags) throws RESyntaxException { // Before terminal int idxBeforeTerminal = idx; // Values to pass by reference to terminal() int[] terminalFlags = { NODE_NORMAL }; // Get terminal symbol int ret = terminal(terminalFlags); // Or in flags from terminal symbol flags[0] |= terminalFlags[0]; // Advance input, set NODE_NULLABLE flag and do sanity checks if (idx >= len) { return ret; } boolean greedy = true; char closureType = pattern.charAt(idx); switch (closureType) { case '?': case '*': // The current node can be null flags[0] |= NODE_NULLABLE; case '+': // Eat closure character idx++; case '{': // Don't allow blantant stupidity int opcode = instruction[ret + RE.offsetOpcode]; if (opcode == RE.OP_BOL || opcode == RE.OP_EOL) { syntaxError("Bad closure operand"); } if ((terminalFlags[0] & NODE_NULLABLE) != 0) { syntaxError("Closure operand can't be nullable"); } break; } // If the next character is a '?', make the closure non-greedy (reluctant) if (idx < len && pattern.charAt(idx) == '?') { idx++; greedy = false; } if (greedy) { // Actually do the closure now switch (closureType) { case '{': { // We look for our bracket in the list boolean found = false; int i; allocBrackets(); for (i = 0; i < brackets; i++) { if (bracketStart[i] == idx) { found = true; break; } } // If its not in the list we parse the {m,n} if (!found) { if (brackets >= maxBrackets) { reallocBrackets(); } bracketStart[brackets] = idx; bracket(); bracketEnd[brackets] = idx; i = brackets++; } // Process min first if (bracketMin[i]-- > 0) { if (bracketMin[i] > 0 || bracketOpt[i] != 0) { // Rewind stream and run it through again - more matchers coming for (int j = 0; j < brackets; j++) { if (j != i && bracketStart[j] < idx && bracketStart[j] >= idxBeforeTerminal) { brackets--; bracketStart[j] = bracketStart[brackets]; bracketEnd[j] = bracketEnd[brackets]; bracketMin[j] = bracketMin[brackets]; bracketOpt[j] = bracketOpt[brackets]; } } idx = idxBeforeTerminal; } else { // Bug #1030: No optinal matches - no need to rewind idx = bracketEnd[i]; } break; } // Do the right thing for maximum ({m,}) if (bracketOpt[i] == bracketUnbounded) { // Drop through now and closure expression. // We are done with the {m,} expr, so skip rest closureType = '*'; bracketOpt[i] = 0; idx = bracketEnd[i]; } else if (bracketOpt[i]-- > 0) { if (bracketOpt[i] > 0) { // More optional matchers - 'play it again sam!' idx = idxBeforeTerminal; } else { // Bug #1030: We are done - this one is last and optional idx = bracketEnd[i]; } // Drop through to optionally close closureType = '?'; } else { // Rollback terminal - neither min nor opt matchers present lenInstruction = ret; node(RE.OP_NOTHING, 0); // We are done. skip the rest of {m,n} expr idx = bracketEnd[i]; break; } } // Fall through! case '?': case '*': if (!greedy) { break; } if (closureType == '?') { // X? is compiled as (X|) nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X setNextOfEnd(ret, node (RE.OP_BRANCH, 0)); // inserted branch to option int nothing = node (RE.OP_NOTHING, 0); // which is OP_NOTHING setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node } if (closureType == '*') { // X* is compiled as (X{gotoX}|) nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0)); // end of X points to an option setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0)); // to goto setNextOfEnd(ret + RE.nodeSize, ret); // the start again setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); // the other option is setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING } break; case '+': { // X+ is compiled as X({gotoX}|) int branch; branch = node(RE.OP_BRANCH, 0); // a new branch setNextOfEnd(ret, branch); // is added to the end of X setNextOfEnd(node(RE.OP_GOTO, 0), ret); // one option is to go back to the start setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); // the other option setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // is OP_NOTHING } break; } } else { // Add end after closured subexpr setNextOfEnd(ret, node(RE.OP_END, 0)); // Actually do the closure now switch (closureType) { case '?': nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); break; case '*':
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -