📄 perl5matcher.java
字号:
if(current == OpCode._NULL_OFFSET) return false; if(__endMatchOffsets[arg] == OpCode._NULL_OFFSET) return false; if(current == __endMatchOffsets[arg]) break; if(__input[current] != nextChar) return false; line = __endMatchOffsets[arg] - current; if(input + line > __eol) return false; if(line > 1 && !__compare(__input, current, __input, input, line)) return false; input+=line; inputRemains = (input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._NOTHING: break; case OpCode._BACK: break; case OpCode._OPEN: arg = OpCode._getArg1(__program, scan); __beginMatchOffsets[arg] = input; if(arg > __expSize) __expSize = arg; break; case OpCode._CLOSE: arg = OpCode._getArg1(__program, scan); __endMatchOffsets[arg] = input; if(arg > __lastParen) __lastParen = arg; break; case OpCode._CURLYX: rep = new Perl5Repetition(); rep._lastRepetition = __currentRep; __currentRep = rep; rep._parenFloor = __lastParen; rep._numInstances = -1; rep._min = OpCode._getArg1(__program, scan); rep._max = OpCode._getArg2(__program, scan); rep._scan = OpCode._getNextOperator(scan) + 2; rep._next = next; rep._minMod = minMod; // Must initialize to -1 because if we initialize to 0 and are // at the beginning of the input the OpCode._WHILEM case will // not work right. rep._lastLocation = -1; __inputOffset = input; // use minMod as temporary minMod = __match(OpCode._getPrevOperator(next)); // leave scope call not pertinent? __currentRep = rep._lastRepetition; return minMod; case OpCode._WHILEM: rep = __currentRep; arg = rep._numInstances + 1; __inputOffset = input; if(input == rep._lastLocation) { __currentRep = rep._lastRepetition; line = __currentRep._numInstances; if(__match(rep._next)) return true; __currentRep._numInstances = line; __currentRep = rep; return false; } if(arg < rep._min) { rep._numInstances = arg; rep._lastLocation = input; if(__match(rep._scan)) return true; rep._numInstances = arg - 1; return false; } if(rep._minMod) { __currentRep = rep._lastRepetition; line = __currentRep._numInstances; if(__match(rep._next)) return true; __currentRep._numInstances = line; __currentRep = rep; if(arg >= rep._max) return false; __inputOffset = input; rep._numInstances = arg; rep._lastLocation = input; if(__match(rep._scan)) return true; rep._numInstances = arg - 1; return false; } if(arg < rep._max) { __pushState(rep._parenFloor); rep._numInstances = arg; rep._lastLocation = input; if(__match(rep._scan)) return true; __popState(); __inputOffset = input; } __currentRep = rep._lastRepetition; line = __currentRep._numInstances; if(__match(rep._next)) return true; rep._numInstances = line; __currentRep = rep; rep._numInstances = arg - 1; return false; case OpCode._BRANCH: if(__program[next] != OpCode._BRANCH) next = OpCode._getNextOperator(scan); else { int lastParen; lastParen = __lastParen; do { __inputOffset = input; if(__match(OpCode._getNextOperator(scan))) return true; for(arg = __lastParen; arg > lastParen; --arg) //__endMatchOffsets[arg] = 0; __endMatchOffsets[arg] = OpCode._NULL_OFFSET; __lastParen = arg; scan = OpCode._getNext(__program, scan); } while(scan != OpCode._NULL_OFFSET && __program[scan] == OpCode._BRANCH); return false; } break; case OpCode._MINMOD: minMod = true; break; case OpCode._CURLY: case OpCode._STAR: case OpCode._PLUS: if(op == OpCode._CURLY) { line = OpCode._getArg1(__program, scan); arg = OpCode._getArg2(__program, scan); scan = OpCode._getNextOperator(scan) + 2; } else if(op == OpCode._STAR) { line = 0; arg = Character.MAX_VALUE; scan = OpCode._getNextOperator(scan); } else { line = 1; arg = Character.MAX_VALUE; scan = OpCode._getNextOperator(scan); } if(__program[next] == OpCode._EXACTLY) { nextChar = __program[OpCode._getOperand(next) + 1]; current = 0; } else { nextChar = __EOS; current = -1000; } __inputOffset = input; if(minMod) { minMod = false; if(line > 0 && __repeat(scan, line) < line) return false; while(arg >= line || (arg == Character.MAX_VALUE && line > 0)) { // there may be a bug here with respect to // __inputOffset >= __input.length, but it seems to be right for // now. the issue is with __inputOffset being reset later. // is this test really supposed to happen here? if(current == -1000 || __inputOffset >= __endOffset || __input[__inputOffset] == nextChar) { if(__match(next)) return true; } __inputOffset = input + line; if(__repeat(scan, 1) != 0) { ++line; __inputOffset = input + line; } else return false; } } else { arg = __repeat(scan, arg); if(line < arg && OpCode._opType[__program[next]] == OpCode._EOL && (!__multiline || __program[next] == OpCode._SEOL)) line = arg; while(arg >= line) { // there may be a bug here with respect to // __inputOffset >= __input.length, but it seems to be right for // now. the issue is with __inputOffset being reset later. // is this test really supposed to happen here? if(current == -1000 || __inputOffset >= __endOffset || __input[__inputOffset] == nextChar) { if(__match(next)) return true; } --arg; __inputOffset = input + arg; } } return false; case OpCode._SUCCEED: case OpCode._END: __inputOffset = input; // This enforces the rule that two consecutive matches cannot have // the same end offset. if(__inputOffset == __lastMatchInputEndOffset) return false; return true; case OpCode._IFMATCH: __inputOffset = input; scan = OpCode._getNextOperator(scan); if(!__match(scan)) return false; break; case OpCode._UNLESSM: __inputOffset = input; scan = OpCode._getNextOperator(scan); if(__match(scan)) return false; break; default: // todo: Need to throw an exception here. } // end switch //scan = (next > 0 ? next : 0); scan = next; } // end while scan return false; } /** * Set whether or not subsequent calls to {@link #matches matches()} * or {@link #contains contains()} should treat the input as * consisting of multiple lines. The default behavior is for * input to be treated as consisting of multiple lines. This method * should only be called if the Perl5Pattern used for a match was * compiled without either of the Perl5Compiler.MULTILINE_MASK or * Perl5Compiler.SINGLELINE_MASK flags, and you want to alter the * behavior of how the <b>^</b>, <b>$</b>, and <b>.</b> metacharacters are * interpreted on the fly. The compilation options used when compiling * a pattern ALWAYS override the behavior specified by setMultiline(). See * {@link Perl5Compiler} for more details. * <p> * @param multiline If set to true treats the input as consisting of * multiple lines with respect to the <b>^</b> and <b>$</b> * metacharacters. If set to false treats the input as consisting * of a single line with respect to the <b>^</b> and <b>$</b> * metacharacters. */ public void setMultiline(boolean multiline) { __multiline = multiline; } /** * @return True if the matcher is treating input as consisting of multiple * lines with respect to the <b>^</b> and <b>$</b> metacharacters, * false otherwise. */ public boolean isMultiline() { return __multiline; } char[] _toLower(char[] input) { int current; char[] inp; // todo: // Certainly not the best way to do case insensitive matching. // Must definitely change this in some way, but for now we // do what Perl does and make a copy of the input, converting // it all to lowercase. This is truly better handled in the // compilation phase. inp = new char[input.length]; System.arraycopy(input, 0, inp, 0, input.length); input = inp; // todo: Need to inline toLowerCase() for(current = 0; current < input.length; current++) if(Character.isUpperCase(input[current])) input[current] = Character.toLowerCase(input[current]); return input; } /** * Determines if a prefix of a string (represented as a char[]) * matches a given pattern, starting from a given offset into the string. * If a prefix of the string matches the pattern, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The char[] to test for a prefix match. * @param pattern The Pattern to be matched. * @param offset The offset at which to start searching for the prefix. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(char[] input, Pattern pattern, int offset) { Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input; if(expression._isCaseInsensitive) input = _toLower(input); __initInterpreterGlobals(expression, input, offset, input.length); __lastSuccess = __tryExpression(expression, offset); __lastMatchResult = null; return __lastSuccess; } /** * Determines if a prefix of a string (represented as a char[]) * matches a given pattern. * If a prefix of the string matches the pattern, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The char[] to test for a prefix match. * @param pattern The Pattern to be matched. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(char[] input, Pattern pattern) { return matchesPrefix(input, pattern, 0); } /** * Determines if a prefix of a string matches a given pattern. * If a prefix of the string matches the pattern, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The String to test for a prefix match. * @param pattern The Pattern to be matched. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(String input, Pattern pattern) { return matchesPrefix(input.toCharArray(), pattern, 0); } /** * Determines if a prefix of a PatternMatcherInput instance * matches a given pattern. If there is a match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. Unlike the * {@link #contains(PatternMatcherInput, Pattern)} * method, the current offset of the PatternMatcherInput argument * is not updated. However, unlike the * {@link #matches matches(PatternMatcherInput, Pattern)} method, * matchesPrefix() will start its search from the current offset * rather than the begin offset of the PatternMatcherInput. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The PatternMatcherInput to test for a prefix match. * @param pattern The Pattern to be matched. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern) { char[] inp; Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input._originalBuffer; if(expression._isCaseInsensitive) { if(input._toLowerBuffer == null) input._toLowerBuffer = _toLower(__originalInput); inp = input._toLowerBuffer;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -