📄 perl5matcher.java
字号:
} else inp = __originalInput; __initInterpreterGlobals(expression, inp, input._currentOffset, input._endOffset); __lastSuccess = __tryExpression(expression, input._currentOffset); __lastMatchResult = null; return __lastSuccess; } /** * Determines if a string (represented as a char[]) exactly * matches a given pattern. If * there is an exact match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. The pattern must be * a Perl5Pattern instance, otherwise a ClassCastException will * be thrown. You are not required to, and indeed should NOT try to * (for performance reasons), catch a ClassCastException because it * will never be thrown as long as you use a Perl5Pattern as the pattern * parameter. * <p> * <b>Note:</b> matches() is not the same as sticking a ^ in front of * your expression and a $ at the end of your expression in Perl5 * and using the =~ operator, even though in many cases it will be * equivalent. matches() literally looks for an exact match according * to the rules of Perl5 expression matching. Therefore, if you have * a pattern <em>foo|foot</em> and are matching the input <em>foot</em> * it will not produce an exact match. But <em>foot|foo</em> will * produce an exact match for either <em>foot</em> or <em>foo</em>. * Remember, Perl5 regular expressions do not match the longest * possible match. From the perlre manpage: * <blockquote> * Alternatives are tried from left to right, so the first * alternative found for which the entire expression matches, * is the one that is chosen. This means that alternatives * are not necessarily greedy. For example: when matching * foo|foot against "barefoot", only the "foo" part will * match, as that is the first alternative tried, and it * successfully matches the target string. * </blockquote> * <p> * @param input The char[] to test for an exact match. * @param pattern The Perl5Pattern to be matched. * @return True if input matches pattern, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean matches(char[] input, Pattern pattern) { Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input; if(expression._isCaseInsensitive) input = _toLower(input); /* if(__interpret(expression, input, 0, input.length)) { if(__lastMatchResult.beginOffset(0) == 0 && __lastMatchResult.endOffset(0) == input.length) return true; } */ __initInterpreterGlobals(expression, input, 0, input.length); __lastSuccess = (__tryExpression(expression, 0) && __endMatchOffsets[0] == input.length); __lastMatchResult = null; return __lastSuccess; } /** * Determines if a string exactly matches a given pattern. If * there is an exact match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. The pattern must be * a Perl5Pattern instance, otherwise a ClassCastException will * be thrown. You are not required to, and indeed should NOT try to * (for performance reasons), catch a ClassCastException because it * will never be thrown as long as you use a Perl5Pattern as the pattern * parameter. * <p> * <b>Note:</b> matches() is not the same as sticking a ^ in front of * your expression and a $ at the end of your expression in Perl5 * and using the =~ operator, even though in many cases it will be * equivalent. matches() literally looks for an exact match according * to the rules of Perl5 expression matching. Therefore, if you have * a pattern <em>foo|foot</em> and are matching the input <em>foot</em> * it will not produce an exact match. But <em>foot|foo</em> will * produce an exact match for either <em>foot</em> or <em>foo</em>. * Remember, Perl5 regular expressions do not match the longest * possible match. From the perlre manpage: * <blockquote> * Alternatives are tried from left to right, so the first * alternative found for which the entire expression matches, * is the one that is chosen. This means that alternatives * are not necessarily greedy. For example: when matching * foo|foot against "barefoot", only the "foo" part will * match, as that is the first alternative tried, and it * successfully matches the target string. * </blockquote> * <p> * @param input The String to test for an exact match. * @param pattern The Perl5Pattern to be matched. * @return True if input matches pattern, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean matches(String input, Pattern pattern) { return matches(input.toCharArray(), pattern); } /** * Determines if the contents of a PatternMatcherInput instance * exactly matches a given pattern. If * there is an exact match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. Unlike the * {@link #contains(PatternMatcherInput, Pattern)} * method, the current offset of the PatternMatcherInput argument * is not updated. You should remember that the region between * the begin (NOT the current) and end offsets of the PatternMatcherInput * will be tested for an exact match. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * <b>Note:</b> matches() is not the same as sticking a ^ in front of * your expression and a $ at the end of your expression in Perl5 * and using the =~ operator, even though in many cases it will be * equivalent. matches() literally looks for an exact match according * to the rules of Perl5 expression matching. Therefore, if you have * a pattern <em>foo|foot</em> and are matching the input <em>foot</em> * it will not produce an exact match. But <em>foot|foo</em> will * produce an exact match for either <em>foot</em> or <em>foo</em>. * Remember, Perl5 regular expressions do not match the longest * possible match. From the perlre manpage: * <blockquote> * Alternatives are tried from left to right, so the first * alternative found for which the entire expression matches, * is the one that is chosen. This means that alternatives * are not necessarily greedy. For example: when matching * foo|foot against "barefoot", only the "foo" part will * match, as that is the first alternative tried, and it * successfully matches the target string. * </blockquote> * <p> * @param input The PatternMatcherInput to test for a match. * @param pattern The Perl5Pattern to be matched. * @return True if input matches pattern, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean matches(PatternMatcherInput input, Pattern pattern) { char[] inp; Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input._originalBuffer; if(expression._isCaseInsensitive) { if(input._toLowerBuffer == null) input._toLowerBuffer = _toLower(__originalInput); inp = input._toLowerBuffer; } else inp = __originalInput; /* if(__interpret(expression, inp, input._beginOffset, input._endOffset)) { // debug //System.err.println("contains: " + getMatch()); //System.err.println(__lastMatchResult.beginOffset(0) + "-" + //__lastMatchResult.endOffset(0)); //System.err.println(input._beginOffset + "-" + //input._endOffset); if(__lastMatchResult.beginOffset(0) == input._beginOffset && __lastMatchResult.endOffset(0) == input._endOffset) return true; // Handle special case. if(input.length() == 0 || (input._beginOffset == input._endOffset)) return true; } */ __initInterpreterGlobals(expression, inp, input._beginOffset, input._endOffset); __lastMatchResult = null; if(__tryExpression(expression, input._beginOffset)) { if(__endMatchOffsets[0] == input._endOffset || input.length() == 0 || input._beginOffset == input._endOffset) { __lastSuccess = true; return true; } } __lastSuccess = false; return false; } /** * Determines if a string contains a pattern. If the pattern is * matched by some substring of the input, a MatchResult instance * representing the <b> first </b> such match is made acessible via * {@link #getMatch()}. If you want to access * subsequent matches you should either use a PatternMatcherInput object * or use the offset information in the MatchResult to create a substring * representing the remaining input. Using the MatchResult offset * information is the recommended method of obtaining the parts of the * string preceeding the match and following the match. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * @param input The String to test for a match. * @param pattern The Perl5Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean contains(String input, Pattern pattern) { /* char[] inp; Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = inp = input.toCharArray(); if(expression._isCaseInsensitive) //_toLower(inp, false); inp = _toLower(inp, false); return __interpret(expression, inp, 0, inp.length); */ return contains(input.toCharArray(), pattern); } /** * Determines if a string (represented as a char[]) contains a pattern. * If the pattern is * matched by some substring of the input, a MatchResult instance * representing the <b> first </b> such match is made acessible via * {@link #getMatch()}. If you want to access * subsequent matches you should either use a PatternMatcherInput object * or use the offset information in the MatchResult to create a substring * representing the remaining input. Using the MatchResult offset * information is the recommended method of obtaining the parts of the * string preceeding the match and following the match. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * @param input The char[] to test for a match. * @param pattern The Perl5Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean contains(char[] input, Pattern pattern) { Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input; if(expression._isCaseInsensitive) input = _toLower(input); return __interpret(expression, input, 0, input.length); } private static final int __DEFAULT_LAST_MATCH_END_OFFSET = -100; private int __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET; /** * Determines if the contents of a PatternMatcherInput, starting from the * current offset of the input contains a pattern. * If a pattern match is found, a MatchResult * instance representing the <b>first</b> such match is made acessible via * {@link #getMatch()}. The current offset of the * PatternMatcherInput is set to the offset corresponding to the end * of the match, so that a subsequent call to this method will continue * searching where the last call left off. You should remember that the * region between the begin and end offsets of the PatternMatcherInput are * considered the input to be searched, and that the current offset * of the PatternMatcherInput reflects where a search will start from. * Matches extending beyond the end offset of the PatternMatcherInput * will not be matched. In other words, a match must occur entirely * between the begin and end offsets of the input. See * {@link PatternMatcherInput} for more details. * <p> * As a side effect, if a match is found, the PatternMatcherInput match * offset information is updated. See the * {@link PatternMatcherInput#setMatchOffsets(int, int)} * method for more details. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * This method is usually used in a loop as follows: * <blockquote><pre> * PatternMatcher matcher; * PatternCompiler compiler; * Pattern pattern; * PatternMatcherInput input; * MatchResult result; * * compiler = new Perl5Compiler(); * matcher = new Perl5Matcher(); * * try { * pattern = compiler.compile(somePatternString); * } catch(MalformedPatternException e) { * System.err.println("Bad pattern."); * System.err.println(e.getMessage()); * return; * } * * input = new PatternMatcherInput(someStringInput); * * while(matcher.contains(input, pattern)) { * result = matcher.getMatch(); * // Perform whatever processing on the result you want. * } * * </pre></blockquote> * <p> * @param input The PatternMatcherInput to test for a match. * @param pattern The Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean contains(PatternMatcherInput input, Pattern pattern) { char[] inp; Perl5Pattern expression; boolean matchFound; //if(input.length() > 0) { // We want to allow a null string to match at the end of the input // which is why we don't check endOfInput. Not sure if this is a // safe thing to do or not. if(input._currentOffset > input._endOffset) return false; //} /* else if(input._endOfInput()) return false; */ expression = (Perl5Pattern)pattern; __originalInput = input._originalBuffer; // Todo: // Really should only reduce to lowercase that part of the // input that is necessary, instead of the whole thing. // Adjust MatchResult offsets accordingly. Actually, pass an adjustment // value to __interpret. __originalInput = input._originalBuffer; if(expression._isCaseInsensitive) { if(input._toLowerBuffer == null) input._toLowerBuffer = _toLower(__originalInput); inp = input._toLowerBuffer; } else inp = __originalInput; __lastMatchInputEndOffset = input.getMatchEndOffset(); matchFound = __interpret(expression, inp, input._currentOffset, input._endOffset); if(matchFound) { input.setCurrentOffset(__endMatchOffsets[0]); input.setMatchOffsets(__beginMatchOffsets[0], __endMatchOffsets[0]); } else { input.setCurrentOffset(input._endOffset + 1); } // Restore so it doesn't interfere with other unrelated matches. __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET; return matchFound; } /** * Fetches the last match found by a call to a matches() or contains() * method. If you plan on modifying the original search input, you * must call this method BEFORE you modify the original search input, * as a lazy evaluation technique is used to create the MatchResult. * This reduces the cost of pattern matching when you don't care about * the actual match and only care if the pattern occurs in the input. * Otherwise, a MatchResult would be created for every match found, * whether or not the MatchResult was later used by a call to getMatch(). * <p> * @return A MatchResult instance containing the pattern match found * by the last call to any one of the matches() or contains() * methods. If no match was found by the last call, returns * null. */ public MatchResult getMatch() { if(!__lastSuccess) return null; if(__lastMatchResult == null) __setLastMatchResult(); return __lastMatchResult; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -