📄 perl5util.java

📁 java实现正则表达式的代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
	// only if we're on an odd backslash and there is another occurrence	// of a delimiter later in the string.	if(backslash && index + 1 < exp.length && exp[index + 1] == delimiter	  && expression.lastIndexOf(delimiter, exp.length - 1) != (index + 1))	{	  finalDelimiter = false;	  continue;	}      } else if(exp[index] == delimiter && finalDelimiter) {	thirdOffset = index;	break;      } else {	backslash      = false;	finalDelimiter = true;      }      replacement.append(exp[index]);    }    if(thirdOffset == -1)      throw new	MalformedPerl5PatternException("Invalid expression: " + expression);    compileOptions    = Perl5Compiler.DEFAULT_MASK;    numSubstitutions  = 1;    // Single quotes cause no interpolations to be performed in replacement    if(delimiter != '\'')      numInterpolations = Perl5Substitution.INTERPOLATE_ALL;    else      numInterpolations = Perl5Substitution.INTERPOLATE_NONE;    // Parse options    for(index = thirdOffset + 1; index < exp.length; index++) {      switch(exp[index]) {      case 'i' :	compileOptions |= Perl5Compiler.CASE_INSENSITIVE_MASK;	break;      case 'm' : compileOptions |= Perl5Compiler.MULTILINE_MASK; break;      case 's' : compileOptions |= Perl5Compiler.SINGLELINE_MASK; break;      case 'x' : compileOptions |= Perl5Compiler.EXTENDED_MASK; break;      case 'g' : numSubstitutions = Util.SUBSTITUTE_ALL; break;      case 'o' : numInterpolations = 1; break;      default  :	throw new	  MalformedPerl5PatternException("Invalid option: " + exp[index]);      }    }    compiledPattern =      __patternCache.getPattern(new String(exp, firstOffset,					   secondOffset - firstOffset),				compileOptions);    substitution =      new Perl5Substitution(replacement.toString(), numInterpolations);    entry = new ParsedSubstitutionEntry(compiledPattern, substitution,					numSubstitutions);    __expressionCache.addElement(expression, entry);    subCount =      Util.substitute(result, __matcher, compiledPattern, substitution,		      input, numSubstitutions);    __lastMatch = __matcher.getMatch();    return subCount;  }  /**   * Substitutes a pattern in a given input with a replacement string.   * The substitution expression is specified in Perl5 native format.   * <dl compact>   *   <dt>Calling this method is the same as:</dt>   *   <dd>   *     <blockquote><pre>   *      String result;   *      StringBuffer buffer = new StringBuffer();   *      perl.substitute(buffer, expression, input);   *      result = buffer.toString();   *     </pre></blockquote>   *   </dd>   * </dl>   * @param expression The Perl5 substitution regular expression.   * @param input      The input on which to perform substitutions.   * @return  The input as a String after substitutions have been performed.   * @exception MalformedPerl5PatternException  If there is an error in   *            the expression.  You are not forced to catch this exception   *            because it is derived from RuntimeException.   * @since 1.0   * @see #substitute   */  public synchronized String substitute(String expression, String input)    throws MalformedPerl5PatternException  {    StringBuffer result = new StringBuffer();    substitute(result, expression, input);    return result.toString();  }   /**   * Splits a String into strings that are appended to a List, but no more   * than a specified limit.  The String is split using a regular expression   * as the delimiter.  The regular expression is a pattern specified   * in Perl5 native format:   * <blockquote><pre>   * [m]/pattern/[i][m][s][x]   * </pre></blockquote>   * The <code>m</code> prefix is optional and the meaning of the optional   * trailing options are:   * <dl compact>    * <dt> i <dd> case insensitive match   * <dt> m <dd> treat the input as consisting of multiple lines   * <dt> s <dd> treat the input as consisting of a single line   * <dt> x <dd> enable extended expression syntax incorporating whitespace   *             and comments   * </dl>   * As with Perl, any non-alphanumeric character can be used in lieu of   * the slashes.   * <p>   * The limit parameter causes the string to be split on at most the first   * <b>limit - 1</b> number of pattern occurences.   * <p>   * Of special note is that this split method performs EXACTLY the same   * as the Perl split() function.  In other words, if the split pattern   * contains parentheses, additional Vector elements are created from   * each of the matching subgroups in the pattern.  Using an example   * similar to the one from the Camel book:   * <blockquote><pre>   * split(list, "/([,-])/", "8-12,15,18")   * </pre></blockquote>   * produces the Vector containing:   * <blockquote><pre>   * { "8", "-", "12", ",", "15", ",", "18" }   * </pre></blockquote>   * Furthermore, the following Perl behavior is observed: "leading empty   * fields are preserved, and empty trailing one are deleted."  This   * has the effect that a split on a zero length string returns an empty   * list.   * The {@link org.apache.oro.text.regex.Util#split Util.split()} method   * does NOT implement these behaviors because it is intended to   * be a general self-consistent and predictable split function usable   * with Pattern instances other than Perl5Pattern.   * <p>   * @param results    *    A <code> Collection </code> to which the substrings of the input   *    that occur between the regular expression delimiter occurences   *    are appended. The input will not be split into any more substrings   *    than the specified    *    limit. A way of thinking of this is that only the first   *    <b>limit - 1</b>   *    matches of the delimiting regular expression will be used to split the   *    input.  The Collection must support the   *    <code>addAll(Collection)</code> operation.   * @param pattern The regular expression to use as a split delimiter.   * @param input The String to split.   * @param limit The limit on the size of the returned <code>Vector</code>.   *   Values <= 0 produce the same behavior as the SPLIT_ALL constant which   *   causes the limit to be ignored and splits to be performed on all   *   occurrences of the pattern.  You should use the SPLIT_ALL constant   *   to achieve this behavior instead of relying on the default behavior   *   associated with non-positive limit values.   * @exception MalformedPerl5PatternException  If there is an error in   *            the expression.  You are not forced to catch this exception   *            because it is derived from RuntimeException.   */  public synchronized void split(Collection results, String pattern,				 String input, int limit)       throws MalformedPerl5PatternException   {    int beginOffset, groups, index;    String group;    MatchResult currentResult = null;    PatternMatcherInput pinput;    Pattern compiledPattern;    compiledPattern = __parseMatchExpression(pattern);    pinput = new PatternMatcherInput(input);    beginOffset = 0;    while(--limit != 0 && __matcher.contains(pinput, compiledPattern)) {      currentResult = __matcher.getMatch();      __splitList.add(input.substring(beginOffset,				      currentResult.beginOffset(0)));      if((groups = currentResult.groups()) > 1) {	for(index = 1; index < groups; ++index) {	  group = currentResult.group(index);	  if(group != null && group.length() > 0)	    __splitList.add(group);	}      }      beginOffset = currentResult.endOffset(0);    }    __splitList.add(input.substring(beginOffset, input.length()));    // Remove all trailing empty fields.    for(int i = __splitList.size() - 1; i >= 0; --i) {      String str;      str = (String)__splitList.get(i);      if(str.length() == 0)	__splitList.remove(i);      else	break;    }    results.addAll(__splitList);    __splitList.clear();    // Just for the sake of completeness    __lastMatch = currentResult;  }  /**   * This method is identical to calling:   * <blockquote><pre>   * split(results, pattern, input, SPLIT_ALL);   * </pre></blockquote>   */  public synchronized void split(Collection results, String pattern,				 String input)       throws MalformedPerl5PatternException   {    split(results, pattern, input, SPLIT_ALL);  }  /**   * Splits input in the default Perl manner, splitting on all whitespace.   * This method is identical to calling:   * <blockquote><pre>   * split(results, "/\\s+/", input);   * </pre></blockquote>   */  public synchronized void split(Collection results, String input)       throws MalformedPerl5PatternException  {    split(results, "/\\s+/", input);  }  /**   * Splits a String into strings contained in a Vector of size no greater   * than a specified limit.  The String is split using a regular expression   * as the delimiter.  The regular expression is a pattern specified   * in Perl5 native format:   * <blockquote><pre>   * [m]/pattern/[i][m][s][x]   * </pre></blockquote>   * The <code>m</code> prefix is optional and the meaning of the optional   * trailing options are:   * <dl compact>    * <dt> i <dd> case insensitive match   * <dt> m <dd> treat the input as consisting of multiple lines   * <dt> s <dd> treat the input as consisting of a single line   * <dt> x <dd> enable extended expression syntax incorporating whitespace   *             and comments   * </dl>   * As with Perl, any non-alphanumeric character can be used in lieu of   * the slashes.   * <p>   * The limit parameter causes the string to be split on at most the first   * <b>limit - 1</b> number of pattern occurences.   * <p>   * Of special note is that this split method performs EXACTLY the same   * as the Perl split() function.  In other words, if the split pattern   * contains parentheses, additional Vector elements are created from   * each of the matching subgroups in the pattern.  Using an example   * similar to the one from the Camel book:   * <blockquote><pre>   * split("/([,-])/", "8-12,15,18")   * </pre></blockquote>   * produces the Vector containing:   * <blockquote><pre>   * { "8", "-", "12", ",", "15", ",", "18" }   * </pre></blockquote>   * The {@link org.apache.oro.text.regex.Util#split Util.split()} method   * does NOT implement this particular behavior because it is intended to   * be usable with Pattern instances other than Perl5Pattern.   * <p>   * @deprecated Use   * {@link #split(Collection results, String pattern, String input, int limit)}   *  instead.   * @param pattern The regular expression to use as a split delimiter.   * @param input The String to split.   * @param limit The limit on the size of the returned <code>Vector</code>.   *   Values <= 0 produce the same behavior as the SPLIT_ALL constant which   *   causes the limit to be ignored and splits to be performed on all   *   occurrences of the pattern.  You should use the SPLIT_ALL constant   *   to achieve this behavior instead of relying on the default behavior   *   associated with non-positive limit values.   * @return A <code> Vector </code> containing the substrings of the input   *    that occur between the regular expression delimiter occurences. The   *    input will not be split into any more substrings than the specified    *    limit. A way of thinking of this is that only the first   *    <b>limit - 1</b>   *    matches of the delimiting regular expression will be used to split the   *    input.    * @exception MalformedPerl5PatternException  If there is an error in   *            the expression.  You are not forced to catch this exception   *            because it is derived from RuntimeException.   */  public synchronized Vector split(String pattern, String input, int limit)       throws MalformedPerl5PatternException   {    Vector results = new Vector(20);    split(results, pattern, input, limit);    return results;  }  /**   * This method is identical to calling:   * <blockquote><pre>   * split(pattern, input, SPLIT_ALL);   * </pre></blockquote>   * @deprecated Use   * {@link #split(Collection results, String pattern, String input)} instead.   */  public synchronized Vector split(String pattern, String input)       throws MalformedPerl5PatternException   {    return split(pattern, input, SPLIT_ALL);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -