📄 perl5util.java
字号:
// only if we're on an odd backslash and there is another occurrence // of a delimiter later in the string. if(backslash && index + 1 < exp.length && exp[index + 1] == delimiter && expression.lastIndexOf(delimiter, exp.length - 1) != (index + 1)) { finalDelimiter = false; continue; } } else if(exp[index] == delimiter && finalDelimiter) { thirdOffset = index; break; } else { backslash = false; finalDelimiter = true; } replacement.append(exp[index]); } if(thirdOffset == -1) throw new MalformedPerl5PatternException("Invalid expression: " + expression); compileOptions = Perl5Compiler.DEFAULT_MASK; numSubstitutions = 1; // Single quotes cause no interpolations to be performed in replacement if(delimiter != '\'') numInterpolations = Perl5Substitution.INTERPOLATE_ALL; else numInterpolations = Perl5Substitution.INTERPOLATE_NONE; // Parse options for(index = thirdOffset + 1; index < exp.length; index++) { switch(exp[index]) { case 'i' : compileOptions |= Perl5Compiler.CASE_INSENSITIVE_MASK; break; case 'm' : compileOptions |= Perl5Compiler.MULTILINE_MASK; break; case 's' : compileOptions |= Perl5Compiler.SINGLELINE_MASK; break; case 'x' : compileOptions |= Perl5Compiler.EXTENDED_MASK; break; case 'g' : numSubstitutions = Util.SUBSTITUTE_ALL; break; case 'o' : numInterpolations = 1; break; default : throw new MalformedPerl5PatternException("Invalid option: " + exp[index]); } } compiledPattern = __patternCache.getPattern(new String(exp, firstOffset, secondOffset - firstOffset), compileOptions); substitution = new Perl5Substitution(replacement.toString(), numInterpolations); entry = new ParsedSubstitutionEntry(compiledPattern, substitution, numSubstitutions); __expressionCache.addElement(expression, entry); subCount = Util.substitute(result, __matcher, compiledPattern, substitution, input, numSubstitutions); __lastMatch = __matcher.getMatch(); return subCount; } /** * Substitutes a pattern in a given input with a replacement string. * The substitution expression is specified in Perl5 native format. * <dl compact> * <dt>Calling this method is the same as:</dt> * <dd> * <blockquote><pre> * String result; * StringBuffer buffer = new StringBuffer(); * perl.substitute(buffer, expression, input); * result = buffer.toString(); * </pre></blockquote> * </dd> * </dl> * @param expression The Perl5 substitution regular expression. * @param input The input on which to perform substitutions. * @return The input as a String after substitutions have been performed. * @exception MalformedPerl5PatternException If there is an error in * the expression. You are not forced to catch this exception * because it is derived from RuntimeException. * @since 1.0 * @see #substitute */ public synchronized String substitute(String expression, String input) throws MalformedPerl5PatternException { StringBuffer result = new StringBuffer(); substitute(result, expression, input); return result.toString(); } /** * Splits a String into strings that are appended to a List, but no more * than a specified limit. The String is split using a regular expression * as the delimiter. The regular expression is a pattern specified * in Perl5 native format: * <blockquote><pre> * [m]/pattern/[i][m][s][x] * </pre></blockquote> * The <code>m</code> prefix is optional and the meaning of the optional * trailing options are: * <dl compact> * <dt> i <dd> case insensitive match * <dt> m <dd> treat the input as consisting of multiple lines * <dt> s <dd> treat the input as consisting of a single line * <dt> x <dd> enable extended expression syntax incorporating whitespace * and comments * </dl> * As with Perl, any non-alphanumeric character can be used in lieu of * the slashes. * <p> * The limit parameter causes the string to be split on at most the first * <b>limit - 1</b> number of pattern occurences. * <p> * Of special note is that this split method performs EXACTLY the same * as the Perl split() function. In other words, if the split pattern * contains parentheses, additional Vector elements are created from * each of the matching subgroups in the pattern. Using an example * similar to the one from the Camel book: * <blockquote><pre> * split(list, "/([,-])/", "8-12,15,18") * </pre></blockquote> * produces the Vector containing: * <blockquote><pre> * { "8", "-", "12", ",", "15", ",", "18" } * </pre></blockquote> * Furthermore, the following Perl behavior is observed: "leading empty * fields are preserved, and empty trailing one are deleted." This * has the effect that a split on a zero length string returns an empty * list. * The {@link org.apache.oro.text.regex.Util#split Util.split()} method * does NOT implement these behaviors because it is intended to * be a general self-consistent and predictable split function usable * with Pattern instances other than Perl5Pattern. * <p> * @param results * A <code> Collection </code> to which the substrings of the input * that occur between the regular expression delimiter occurences * are appended. The input will not be split into any more substrings * than the specified * limit. A way of thinking of this is that only the first * <b>limit - 1</b> * matches of the delimiting regular expression will be used to split the * input. The Collection must support the * <code>addAll(Collection)</code> operation. * @param pattern The regular expression to use as a split delimiter. * @param input The String to split. * @param limit The limit on the size of the returned <code>Vector</code>. * Values <= 0 produce the same behavior as the SPLIT_ALL constant which * causes the limit to be ignored and splits to be performed on all * occurrences of the pattern. You should use the SPLIT_ALL constant * to achieve this behavior instead of relying on the default behavior * associated with non-positive limit values. * @exception MalformedPerl5PatternException If there is an error in * the expression. You are not forced to catch this exception * because it is derived from RuntimeException. */ public synchronized void split(Collection results, String pattern, String input, int limit) throws MalformedPerl5PatternException { int beginOffset, groups, index; String group; MatchResult currentResult = null; PatternMatcherInput pinput; Pattern compiledPattern; compiledPattern = __parseMatchExpression(pattern); pinput = new PatternMatcherInput(input); beginOffset = 0; while(--limit != 0 && __matcher.contains(pinput, compiledPattern)) { currentResult = __matcher.getMatch(); __splitList.add(input.substring(beginOffset, currentResult.beginOffset(0))); if((groups = currentResult.groups()) > 1) { for(index = 1; index < groups; ++index) { group = currentResult.group(index); if(group != null && group.length() > 0) __splitList.add(group); } } beginOffset = currentResult.endOffset(0); } __splitList.add(input.substring(beginOffset, input.length())); // Remove all trailing empty fields. for(int i = __splitList.size() - 1; i >= 0; --i) { String str; str = (String)__splitList.get(i); if(str.length() == 0) __splitList.remove(i); else break; } results.addAll(__splitList); __splitList.clear(); // Just for the sake of completeness __lastMatch = currentResult; } /** * This method is identical to calling: * <blockquote><pre> * split(results, pattern, input, SPLIT_ALL); * </pre></blockquote> */ public synchronized void split(Collection results, String pattern, String input) throws MalformedPerl5PatternException { split(results, pattern, input, SPLIT_ALL); } /** * Splits input in the default Perl manner, splitting on all whitespace. * This method is identical to calling: * <blockquote><pre> * split(results, "/\\s+/", input); * </pre></blockquote> */ public synchronized void split(Collection results, String input) throws MalformedPerl5PatternException { split(results, "/\\s+/", input); } /** * Splits a String into strings contained in a Vector of size no greater * than a specified limit. The String is split using a regular expression * as the delimiter. The regular expression is a pattern specified * in Perl5 native format: * <blockquote><pre> * [m]/pattern/[i][m][s][x] * </pre></blockquote> * The <code>m</code> prefix is optional and the meaning of the optional * trailing options are: * <dl compact> * <dt> i <dd> case insensitive match * <dt> m <dd> treat the input as consisting of multiple lines * <dt> s <dd> treat the input as consisting of a single line * <dt> x <dd> enable extended expression syntax incorporating whitespace * and comments * </dl> * As with Perl, any non-alphanumeric character can be used in lieu of * the slashes. * <p> * The limit parameter causes the string to be split on at most the first * <b>limit - 1</b> number of pattern occurences. * <p> * Of special note is that this split method performs EXACTLY the same * as the Perl split() function. In other words, if the split pattern * contains parentheses, additional Vector elements are created from * each of the matching subgroups in the pattern. Using an example * similar to the one from the Camel book: * <blockquote><pre> * split("/([,-])/", "8-12,15,18") * </pre></blockquote> * produces the Vector containing: * <blockquote><pre> * { "8", "-", "12", ",", "15", ",", "18" } * </pre></blockquote> * The {@link org.apache.oro.text.regex.Util#split Util.split()} method * does NOT implement this particular behavior because it is intended to * be usable with Pattern instances other than Perl5Pattern. * <p> * @deprecated Use * {@link #split(Collection results, String pattern, String input, int limit)} * instead. * @param pattern The regular expression to use as a split delimiter. * @param input The String to split. * @param limit The limit on the size of the returned <code>Vector</code>. * Values <= 0 produce the same behavior as the SPLIT_ALL constant which * causes the limit to be ignored and splits to be performed on all * occurrences of the pattern. You should use the SPLIT_ALL constant * to achieve this behavior instead of relying on the default behavior * associated with non-positive limit values. * @return A <code> Vector </code> containing the substrings of the input * that occur between the regular expression delimiter occurences. The * input will not be split into any more substrings than the specified * limit. A way of thinking of this is that only the first * <b>limit - 1</b> * matches of the delimiting regular expression will be used to split the * input. * @exception MalformedPerl5PatternException If there is an error in * the expression. You are not forced to catch this exception * because it is derived from RuntimeException. */ public synchronized Vector split(String pattern, String input, int limit) throws MalformedPerl5PatternException { Vector results = new Vector(20); split(results, pattern, input, limit); return results; } /** * This method is identical to calling: * <blockquote><pre> * split(pattern, input, SPLIT_ALL); * </pre></blockquote> * @deprecated Use * {@link #split(Collection results, String pattern, String input)} instead. */ public synchronized Vector split(String pattern, String input) throws MalformedPerl5PatternException { return split(pattern, input, SPLIT_ALL);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -