📄 perl5util.java

📁 java实现正则表达式的代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
	}      }    }    ret = __patternCache.getPattern(regex, compileOptions);    __expressionCache.addElement(pattern, ret);    return ret;  }  /**   * Searches for the first pattern match somewhere in a character array   * taking a pattern specified in Perl5 native format:   * <blockquote><pre>   * [m]/pattern/[i][m][s][x]   * </pre></blockquote>   * The <code>m</code> prefix is optional and the meaning of the optional   * trailing options are:   * <dl compact>    * <dt> i <dd> case insensitive match   * <dt> m <dd> treat the input as consisting of multiple lines   * <dt> s <dd> treat the input as consisting of a single line   * <dt> x <dd> enable extended expression syntax incorporating whitespace   *             and comments   * </dl>   * As with Perl, any non-alphanumeric character can be used in lieu of   * the slashes.   * <p>   * If the input contains the pattern, the org.apache.oro.text.regex.MatchResult   * can be obtained by calling {@link #getMatch()}.   * However, Perl5Util implements the MatchResult interface as a wrapper   * around the last MatchResult found, so you can call its methods to   * access match information.   * <p>   * @param pattern  The pattern to search for.   * @param input    The char[] input to search.   * @return True if the input contains the pattern, false otherwise.   * @exception MalformedPerl5PatternException  If there is an error in   *            the pattern.  You are not forced to catch this exception   *            because it is derived from RuntimeException.   */  public synchronized boolean match(String pattern, char[] input)        throws MalformedPerl5PatternException  {    boolean result;    __parseMatchExpression(pattern);    result = __matcher.contains(input, __parseMatchExpression(pattern));			     if(result) {      __lastMatch        = __matcher.getMatch();      __originalInput    = input;      __inputBeginOffset = 0;      __inputEndOffset   = input.length;    }    return result;  }  /**   * Searches for the first pattern match in a String taking   * a pattern specified in Perl5 native format:   * <blockquote><pre>   * [m]/pattern/[i][m][s][x]   * </pre></blockquote>   * The <code>m</code> prefix is optional and the meaning of the optional   * trailing options are:   * <dl compact>    * <dt> i <dd> case insensitive match   * <dt> m <dd> treat the input as consisting of multiple lines   * <dt> s <dd> treat the input as consisting of a single line   * <dt> x <dd> enable extended expression syntax incorporating whitespace   *             and comments   * </dl>   * As with Perl, any non-alphanumeric character can be used in lieu of   * the slashes.   * <p>   * If the input contains the pattern, the   * {@link org.apache.oro.text.regex.MatchResult MatchResult}   * can be obtained by calling {@link #getMatch()}.   * However, Perl5Util implements the MatchResult interface as a wrapper   * around the last MatchResult found, so you can call its methods to   * access match information.   * <p>   * @param pattern  The pattern to search for.   * @param input    The String input to search.   * @return True if the input contains the pattern, false otherwise.   * @exception MalformedPerl5PatternException  If there is an error in   *            the pattern.  You are not forced to catch this exception   *            because it is derived from RuntimeException.   */  public synchronized boolean match(String pattern, String input)       throws MalformedPerl5PatternException  {    return match(pattern, input.toCharArray());  }  /**   * Searches for the next pattern match somewhere in a   * org.apache.oro.text.regex.PatternMatcherInput instance, taking   * a pattern specified in Perl5 native format:   * <blockquote><pre>   * [m]/pattern/[i][m][s][x]   * </pre></blockquote>   * The <code>m</code> prefix is optional and the meaning of the optional   * trailing options are:   * <dl compact>    * <dt> i <dd> case insensitive match   * <dt> m <dd> treat the input as consisting of multiple lines   * <dt> s <dd> treat the input as consisting of a single line   * <dt> x <dd> enable extended expression syntax incorporating whitespace   *             and comments   * </dl>   * As with Perl, any non-alphanumeric character can be used in lieu of   * the slashes.   * <p>   * If the input contains the pattern, the   * {@link org.apache.oro.text.regex.MatchResult MatchResult}   * can be obtained by calling {@link #getMatch()}.   * However, Perl5Util implements the MatchResult interface as a wrapper   * around the last MatchResult found, so you can call its methods to   * access match information.   * After the call to this method, the PatternMatcherInput current offset   * is advanced to the end of the match, so you can use it to repeatedly   * search for expressions in the entire input using a while loop as   * explained in the {@link org.apache.oro.text.regex.PatternMatcherInput   * PatternMatcherInput} documentation.   * <p>   * @param pattern  The pattern to search for.   * @param input    The PatternMatcherInput to search.   * @return True if the input contains the pattern, false otherwise.   * @exception MalformedPerl5PatternException  If there is an error in   *            the pattern.  You are not forced to catch this exception   *            because it is derived from RuntimeException.   */  public synchronized boolean match(String pattern, PatternMatcherInput input)       throws MalformedPerl5PatternException  {    boolean result;    result = __matcher.contains(input, __parseMatchExpression(pattern));    if(result) {      __lastMatch     = __matcher.getMatch();      __originalInput = input.getInput();      __inputBeginOffset = input.getBeginOffset();      __inputEndOffset   = input.getEndOffset();    }    return result;  }  /**   * Returns the last match found by a call to a match(), substitute(), or   * split() method.  This method is only intended for use to retrieve a match   * found by the last match found by a match() method.  This method should   * be used when you want to save MatchResult instances.  Otherwise, for   * simply accessing match information, it is more convenient to use the   * Perl5Util methods implementing the MatchResult interface.   * <p>   * @return The org.apache.oro.text.regex.MatchResult instance containing the   *         last match found.   */  public synchronized MatchResult getMatch() {    return __lastMatch;  }  /**   * Substitutes a pattern in a given input with a replacement string.   * The substitution expression is specified in Perl5 native format:   * <blockquote><pre>   * s/pattern/replacement/[g][i][m][o][s][x]   * </pre></blockquote>   * The <code>s</code> prefix is mandatory and the meaning of the optional   * trailing options are:   * <dl compact>    * <dt> g <dd> Substitute all occurrences of pattern with replacement.   *             The default is to replace only the first occurrence.   * <dt> i <dd> perform a case insensitive match   * <dt> m <dd> treat the input as consisting of multiple lines   * <dt> o <dd> If variable interopolation is used, only evaluate the   *             interpolation once (the first time).  This is equivalent   *             to using a numInterpolations argument of 1 in   * {@link org.apache.oro.text.regex.Util#substitute Util.substitute()}.   *             The default is to compute each interpolation independently.   *             See   * {@link org.apache.oro.text.regex.Util#substitute Util.substitute()}   * and {@link org.apache.oro.text.regex.Perl5Substitution Perl5Substitution}   *             for more details on variable interpolation in   *             substitutions.   * <dt> s <dd> treat the input as consisting of a single line   * <dt> x <dd> enable extended expression syntax incorporating whitespace   *             and comments   * </dl>   * As with Perl, any non-alphanumeric character can be used in lieu of   * the slashes.  This is helpful to avoid backslashing.  For example,   * using slashes you would have to do:   * <blockquote><pre>   * numSubs = util.substitute(result, "s/foo\\/bar/goo\\/\\/baz/", input);   * </pre></blockquote>   * when you could more easily write:   * <blockquote><pre>   * numSubs = util.substitute(result, "s#foo/bar#goo//baz#", input);   * </pre></blockquote>   * where the hashmarks are used instead of slashes.   * <p>   * There is a special case of backslashing that you need to pay attention   * to.  As demonstrated above, to denote a delimiter in the substituted   * string it must be backslashed.  However, this can be a problem   * when you want to denote a backslash at the end of the substituted   * string.  As of PerlTools 1.3, a new means of handling this   * situation has been implemented.   * In previous versions, the behavior was that   * <blockquote>   * "... a double backslash (quadrupled in the Java String) always   * represents two backslashes unless the second backslash is followed   * by the delimiter, in which case it represents a single backslash."   * </blockquote>   * <p>   * The new behavior is that a backslash is always a backslash   * in the substitution portion of the expression unless it is used to   * escape a delimiter.  A backslash is considered to escape a delimiter   * if an even number of contiguous backslashes preceed the backslash   * and the delimiter following the backslash is not the FINAL delimiter   * in the expression.  Therefore, backslashes preceding final delimiters   * are never considered to escape the delimiter.  The following, which   * used to be an invalid expression and require a special-case extra   * backslash, will now replace all instances of / with \:   * <blockquote><pre>   * numSubs = util.substitute(result, "s#/#\\#g", input);   * </pre></blockquote>   * <p>   * @param result     The StringBuffer in which to store the result of the   *                   substitutions. The buffer is only appended to.   * @param expression The Perl5 substitution regular expression.   * @param input      The input on which to perform substitutions.   * @return The number of substitutions made.   * @exception MalformedPerl5PatternException  If there is an error in   *            the expression.  You are not forced to catch this exception   *            because it is derived from RuntimeException.   * @since 2.0.6   */  // Expression parsing will have to be moved into a separate method if  // there are going to be variations of this method.  public synchronized int substitute(StringBuffer result, String expression,				     String input)       throws MalformedPerl5PatternException   {    boolean backslash, finalDelimiter;    int index, compileOptions, numSubstitutions, numInterpolations;    int firstOffset, secondOffset, thirdOffset, subCount;    StringBuffer replacement;    Pattern compiledPattern;    char exp[], delimiter;    ParsedSubstitutionEntry entry;    Perl5Substitution substitution;    Object obj;    obj = __expressionCache.getElement(expression);  __nullTest:    if(obj != null) {      // Must catch ClassCastException because someone might incorrectly       // pass an m// expression.  try block is cheaper than checking      // instanceof.  We want to go ahead with parsing just in case so      // we break.      try {	entry = (ParsedSubstitutionEntry)obj;      } catch(ClassCastException e) {	break __nullTest;      }      subCount =	Util.substitute(result, __matcher, entry._pattern, entry._substitution,			input, entry._numSubstitutions);      __lastMatch = __matcher.getMatch();      return subCount;    }    exp = expression.toCharArray();    // Make sure basic conditions for a valid substitution expression hold.    if(exp.length < 4 || exp[0] != 's' || Character.isLetterOrDigit(exp[1])       || exp[1] == '-')      throw new	MalformedPerl5PatternException("Invalid expression: " + expression);    delimiter    = exp[1];    firstOffset  = 2;    secondOffset = thirdOffset = -1;    backslash    = false;    // Parse pattern    for(index = firstOffset; index < exp.length; index++) {      if(exp[index] == '\\')	backslash = !backslash;      else if(exp[index] == delimiter && !backslash) {	secondOffset = index;	break;      } else if(backslash) 	backslash = !backslash;    }    if(secondOffset == -1 || secondOffset == exp.length - 1)      throw new	MalformedPerl5PatternException("Invalid expression: " + expression);    // Parse replacement string    backslash = false;    finalDelimiter = true;    replacement = new StringBuffer(exp.length - secondOffset);    for(index = secondOffset + 1; index < exp.length; index++) {      if(exp[index] == '\\') {	backslash = !backslash;	// 05/05/99 dfs	// We unbackslash backslashed delimiters in the replacement string
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -