📄 perl5util.java
字号:
* The <code>m</code> prefix is optional and the meaning of the optional * trailing options are: * <dl compact> * <dt> i <dd> case insensitive match * <dt> m <dd> treat the input as consisting of multiple lines * <dt> s <dd> treat the input as consisting of a single line * <dt> x <dd> enable extended expression syntax incorporating whitespace * and comments * </dl> * As with Perl, any non-alphanumeric character can be used in lieu of * the slashes. * <p> * If the input contains the pattern, the org.apache.oro.text.regex.MatchResult * can be obtained by calling {@link #getMatch()}. * However, Perl5Util implements the MatchResult interface as a wrapper * around the last MatchResult found, so you can call its methods to * access match information. * After the call to this method, the PatternMatcherInput current offset * is advanced to the end of the match, so you can use it to repeatedly * search for expressions in the entire input using a while loop as * explained in the OROMatcher<font size="-2"><sup>TM</sup></font> package. * <p> * @param pattern The pattern to search for. * @param input The PatternMatcherInput to search. * @return True if the input contains the pattern, false otherwise. * @exception MalformedPerl5PatternException If there is an error in * the pattern. You are not forced to catch this exception * because it is derived from RuntimeException. */ public synchronized boolean match(String pattern, PatternMatcherInput input) throws MalformedPerl5PatternException { boolean result; result = __matcher.contains(input, __parseMatchExpression(pattern)); if(result) { __lastMatch = __matcher.getMatch(); __originalInput = input.getInput(); __inputBeginOffset = input.getBeginOffset(); __inputEndOffset = input.getEndOffset(); } return result; } /** * Returns the last match found by a call to a match(), substitute(), or * split() method. This method is only intended for use to retrieve a match * found by the last match found by a match() method. This method should * be used when you want to save MatchResult instances. Otherwise, for * simply accessing match information, it is more convenient to use the * Perl5Util methods implementing the MatchResult interface. * <p> * @return The org.apache.oro.text.regex.MatchResult instance containing the * last match found. */ public synchronized MatchResult getMatch() { return __lastMatch; } /** * Substitutes a pattern in a given input with a replacement string. * The substitution expression is specified in Perl5 native format: * <blockquote><pre> * s/pattern/replacement/[g][i][m][o][s][x] * </pre></blockquote> * The <code>s</code> prefix is mandatory and the meaning of the optional * trailing options are: * <dl compact> * <dt> g <dd> Substitute all occurrences of pattern with replacement. * The default is to replace only the first occurrence. * <dt> i <dd> perform a case insensitive match * <dt> m <dd> treat the input as consisting of multiple lines * <dt> o <dd> If variable interopolation is used, only evaluate the * interpolation once (the first time). This is equivalent * to using a numInterpolations argument of 1 in the * OROMatcher<font size="-2"><sup>TM</sup></font> * Util.substitute() method. The default is to compute * each interpolation independently. See the * OROMatcher<font size="-2"><sup>TM</sup></font> * Util.substitute() method for more details on variable * interpolation in substitutions. * <dt> s <dd> treat the input as consisting of a single line * <dt> x <dd> enable extended expression syntax incorporating whitespace * and comments * </dl> * As with Perl, any non-alphanumeric character can be used in lieu of * the slashes. This is helpful to avoid backslashing. For example, * using slashes you would have to do: * <blockquote><pre> * result = util.substitute("s/foo\\/bar/goo\\/\\/baz/", input); * </pre></blockquote> * when you could more easily write: * <blockquote><pre> * result = util.substitute("s#foo/bar#goo//baz#", input); * </pre></blockquote> * where the hashmarks are used instead of slashes. * <p> * There is a special case of backslashing that you need to pay attention * to. As demonstrated above, to denote a delimiter in the substituted * string it must be backslashed. However, this can be a problem * when you want to denote a backslash at the end of the substituted * string. As of PerlTools 1.3, a new means of handling this * situation has been implemented. * In previous versions, the behavior was that * <blockquote> * "... a double backslash (quadrupled in the Java String) always * represents two backslashes unless the second backslash is followed * by the delimiter, in which case it represents a single backslash." * </blockquote> * <p> * The new behavior is that a backslash is always a backslash * in the substitution portion of the expression unless it is used to * escape a delimiter. A backslash is considered to escape a delimiter * if an even number of contiguous backslashes preceed the backslash * and the delimiter following the backslash is not the FINAL delimiter * in the expression. Therefore, backslashes preceding final delimiters * are never considered to escape the delimiter. The following, which * used to be an invalid expression and require a special-case extra * backslash, will now replace all instances of / with \: * <blockquote><pre> * result = util.substitute("s#/#\\#g", input); * </pre></blockquote> * <p> * @param expression The substitution expression. * @param input The input. * @return The input after substitutions have been performed. * @exception MalformedPerl5PatternException If there is an error in * the expression. You are not forced to catch this exception * because it is derived from RuntimeException. */ // Expression parsing will have to be moved into a separate method if // there are going to be variations of this method. public synchronized String substitute(String expression, String input) throws MalformedPerl5PatternException { boolean backslash, finalDelimiter; int index, compileOptions, numSubstitutions, numInterpolations; int firstOffset, secondOffset, thirdOffset; String result; StringBuffer replacement; Pattern compiledPattern; char exp[], delimiter; ParsedSubstitutionEntry entry; Perl5Substitution substitution; Object obj; obj = __expressionCache.getElement(expression); __nullTest: if(obj != null) { // Must catch ClassCastException because someone might incorrectly // pass an m// expression. try block is cheaper than checking // instanceof. We want to go ahead with parsing just in case so // we break. try { entry = (ParsedSubstitutionEntry)obj; } catch(ClassCastException e) { break __nullTest; } result = Util.substitute(__matcher, entry._pattern, entry._substitution, input, entry._numSubstitutions); __lastMatch = __matcher.getMatch(); return result; } exp = expression.toCharArray(); // Make sure basic conditions for a valid substitution expression hold. if(exp.length < 4 || exp[0] != 's' || Character.isLetterOrDigit(exp[1]) || exp[1] == '-') throw new MalformedPerl5PatternException("Invalid expression: " + expression); delimiter = exp[1]; firstOffset = 2; secondOffset = thirdOffset = -1; backslash = false; // Parse pattern for(index = firstOffset; index < exp.length; index++) { if(exp[index] == '\\') backslash = !backslash; else if(exp[index] == delimiter && !backslash) { secondOffset = index; break; } else if(backslash) backslash = !backslash; } if(secondOffset == -1 || secondOffset == exp.length - 1) throw new MalformedPerl5PatternException("Invalid expression: " + expression); // Parse replacement string backslash = false; finalDelimiter = true; replacement = new StringBuffer(exp.length - secondOffset); for(index = secondOffset + 1; index < exp.length; index++) { if(exp[index] == '\\') { backslash = !backslash; // 05/05/99 dfs // We unbackslash backslashed delimiters in the replacement string // only if we're on an odd backslash and there is another occurrence // of a delimiter later in the string. if(backslash && index + 1 < exp.length && exp[index + 1] == delimiter && expression.lastIndexOf(delimiter, exp.length - 1) != (index + 1)) { finalDelimiter = false; continue; } } else if(exp[index] == delimiter && finalDelimiter) { thirdOffset = index; break; } else { backslash = false; finalDelimiter = true; } replacement.append(exp[index]); } if(thirdOffset == -1) throw new MalformedPerl5PatternException("Invalid expression: " + expression); compileOptions = Perl5Compiler.DEFAULT_MASK; numSubstitutions = 1; // Single quotes cause no interpolations to be performed in replacement if(delimiter != '\'') numInterpolations = Perl5Substitution.INTERPOLATE_ALL; else numInterpolations = Perl5Substitution.INTERPOLATE_NONE; // Parse options for(index = thirdOffset + 1; index < exp.length; index++) { switch(exp[index]) { case 'i' : compileOptions |= Perl5Compiler.CASE_INSENSITIVE_MASK; break; case 'm' : compileOptions |= Perl5Compiler.MULTILINE_MASK; break; case 's' : compileOptions |= Perl5Compiler.SINGLELINE_MASK; break; case 'x' : compileOptions |= Perl5Compiler.EXTENDED_MASK; break; case 'g' : numSubstitutions = Util.SUBSTITUTE_ALL; break; case 'o' : numInterpolations = 1; break; default : throw new MalformedPerl5PatternException("Invalid option: " + exp[index]); } } compiledPattern = __patternCache.getPattern(new String(exp, firstOffset, secondOffset - firstOffset), compileOptions); substitution = new Perl5Substitution(replacement.toString(), numInterpolations); entry = new ParsedSubstitutionEntry(compiledPattern, substitution, numSubstitutions); __expressionCache.addElement(expression, entry); result = Util.substitute(__matcher, compiledPattern, substitution, input, numSubstitutions); __lastMatch = __matcher.getMatch(); return result; } /** * Splits a String into strings that are appended to a List, but no more * than a specified limit. The String is split using a regular expression * as the delimiter. The regular expression is a pattern specified * in Perl5 native format: * <blockquote><pre> * [m]/pattern/[i][m][s][x] * </pre></blockquote> * The <code>m</code> prefix is optional and the meaning of the optional * trailing options are: * <dl compact> * <dt> i <dd> case insensitive match * <dt> m <dd> treat the input as consisting of multiple lines * <dt> s <dd> treat the input as consisting of a single line * <dt> x <dd> enable extended expression syntax incorporating whitespace * and comments * </dl> * As with Perl, any non-alphanumeric character can be used in lieu of * the slashes. * <p> * The limit parameter causes the string to be split on at most the first * <b>limit - 1</b> number of pattern occurences. * <p> * Of special note is that this split method performs EXACTLY the same * as the Perl split() function. In other words, if the split pattern * contains parentheses, additional Vector elements are created from * each of the matching subgroups in the pattern. Using an example * similar to the one from the Camel book: * <blockquote><pre> * split(list, "/([,-])/", "8-12,15,18") * </pre></blockquote> * produces the Vector containing: * <blockquote><pre> * { "8", "-", "12", ",", "15", ",", "18" } * </pre></blockquote> * The Util.split() method in the * OROMatcher<font size="-2"><sup>TM</sup></font> package does NOT * implement this particular behavior because it is intended to * be usable with Pattern instances other than Perl5Pattern. * <p> * @param results * A <code> List </code> to which the substrings of the input * that occur between the regular expression delimiter occurences * are appended. The input will not be split into any more substrings * than the specified * limit. A way of thinking of this is that only the first * <b>limit - 1</b> * matches of the delimiting regular expression will be used to split the * input. * @param pattern The regular expression to use as a split delimiter. * @param input The String to split. * @param limit The limit on the size of the returned <code>Vector</code>. * Values <= 0 produce the same behavior as the SPLIT_ALL constant which * causes the limit to be ignored and splits to be performed on all * occurrences of the pattern. You should use the SPLIT_ALL constant * to achieve this behavior instead of relying on the default behavior * associated with non-positive limit values. * @exception MalformedPerl5PatternException If there is an error in * the expression. You are not forced to catch this exception * because it is derived from RuntimeException. */ public synchronized void split(List results, String pattern, String input, int limit) throws MalformedPerl5PatternException { int beginOffset, groups, index; String group; MatchResult currentResult = null; PatternMatcherInput pinput; Pattern compiledPattern; compiledPattern = __parseMatchExpression(pattern); pinput = new PatternMatcherInput(input); beginOffset = 0; while(--limit != 0 && __matcher.contains(pinput, compiledPattern)) { currentResult = __matcher.getMatch(); results.add(input.substring(beginOffset, currentResult.beginOffset(0))); if((groups = currentResult.groups()) > 1) { for(index = 1; index < groups; ++index) { group = currentResult.group(index); if(group != null && group.length() > 0) results.add(group); } } beginOffset = currentResult.endOffset(0); } results.add(input.substring(beginOffset, input.length())); // Just for the sake of completeness __lastMatch = currentResult; } /** * This method is identical to calling: * <blockquote><pre> * split(results, pattern, input, SPLIT_ALL); * </pre></blockquote> */ public synchronized void split(List results, String pattern, String input) throws MalformedPerl5PatternException { split(results, pattern, input, SPLIT_ALL); } /** * Splits a String into strings contained in a Vector of size no greater * than a specified limit. The String is split using a regular expression * as the delimiter. The regular expression is a pattern specified * in Perl5 native format: * <blockquote><pre> * [m]/pattern/[i][m][s][x] * </pre></blockquote> * The <code>m</code> prefix is optional and the meaning of the optional * trailing options are: * <dl compact> * <dt> i <dd> case insensitive match * <dt> m <dd> treat the input as consisting of multiple lines * <dt> s <dd> treat the input as consisting of a single line * <dt> x <dd> enable extended expression syntax incorporating whitespace * and comments * </dl> * As with Perl, any non-alphanumeric character can be used in lieu of * the slashes. * <p> * The limit parameter causes the string to be split on at most the first * <b>limit - 1</b> number of pattern occurences.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -