📄 perl5substitution.java
字号:
/* * $Id: Perl5Substitution.java,v 1.13 2003/11/07 20:16:25 dfs Exp $ * * ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2000 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" * must not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache" * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their * name, without prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */package org.apache.oro.text.regex;import java.util.*;/** * Perl5Substitution implements a Substitution consisting of a * literal string, but allowing Perl5 variable interpolation referencing * saved groups in a match. This class is intended for use with * {@link Util#substitute Util.substitute}. * <p> * The substitution string may contain variable interpolations referring * to the saved parenthesized groups of the search pattern. * A variable interpolation is denoted by <b>$1</b>, or <b>$2</b>, * or <b>$3</b>, etc. If you want such expressions to be * interpreted literally, you should set the <b> numInterpolations </b> * parameter to <b> INTERPOLATE_NONE </b>. It is easiest to explain * what an interpolated variable does by giving an example: * <ul> * Suppose you have the pattern <b>b\d+:</b> and you want to substitute * the <b>b</b>'s for <b>a</b>'s and the colon for a dash in parts of * your input matching the pattern. You can do this by changing the * pattern to <b>b(\d+):</b> and using the substitution expression * <b>a$1-</b>. When a substitution is made, the <b>$1</b> means * "Substitute whatever was matched by the first saved group of the * matching pattern." An input of <b>b123:</b> after substitution * would yield a result of <b>a123-</b>. But there's a little more * to be aware of. If you set the <b>numInterpolations</b> parameter to * <b>INTERPOLATE_ALL</b>, then every time a match is found, the * interpolation variables are computed relative to that match. * But if <b>numInterpolations</b> is set to some positive integer, then * only the interpolation variables for the first <b>numInterpolations</b> * matches are computed relative to the most recent match. After that, * the remaining substitutions have their variable interpolations performed * relative to the <b> numInterpolations </b>'th match. So using the * previously mentioned pattern and substitution expression, if you have * an input of <pre><b>Tank b123: 85 Tank b256: 32 Tank b78: 22</b></pre> * and use a <b> numInterpolations </b> value of <b>INTERPOLATE_ALL</b> and * <b> numSubs </b> value (see * {@link Util#substitute Util.substitute}) * of <b> SUBSTITUTE_ALL</b>, then your result will be: * <pre><b>Tank a123- 85 Tank a256- 32 Tank a78- 22</b></pre> * But if you set <b> numInterpolations </b> to 2 and keep * <b> numSubs </b> with a value of <b>SUBSTITUTE_ALL</b>, your result is: * <pre><b>Tank a123- 85 Tank a256- 32 Tank a256- 22</b></pre> * Notice how the last substitution uses the same value for <b>$1</b> * as the second substitution. * </ul> * <p> * A final thing to keep in mind is that if you use an interpolation variable * that corresponds to a group not contained in the match, then it is * interpreted as the empty string. So given the regular expression from the * example, and a substitution expression of <b>a$2-</b>, the result * of the last sample input would be: * <pre><b>Tank a- 85 Tank a- 32 Tank a- 22</b></pre> * The special substitution <b>$&</b> will interpolate the entire portion * of the input matched by the regular expression. <b>$0</b> will * do the same, but it is recommended that it be avoided because the * latest versions of Perl use <b>$0</b> to store the program name rather * than duplicate the behavior of <b>$&</b>. * Also, the result of substituting $ followed by a non-positive integer * is undefined. In order to include a $ in a substitution, it should * be escaped with a backslash (e.g., <b>"\\$0"</b>). * <p> * Perl5 double-quoted string case modification is also supported in * the substitution. The following escape sequences are supported: * <dl compact> * <dt> \\U <dd> make substitution uppercase until end of substitution or \\E * <dt> \\u <dd> make next character uppercase * <dt> \\L <dd> make substitution uppercase until end of substitution or \\E * <dt> \\l <dd> make next character uppercase * <dt> \\E <dd> mark the end of the case modification * </dl> * The double backslashes are shown to remind you that to make a * backslash get past Java's string handling and appear as a backslash * to the substitution, you must escape the backslash. * * @version @version@ * @since 1.1 * @see Substitution * @see Util * @see Util#substitute * @see Substitution * @see StringSubstitution */public class Perl5Substitution extends StringSubstitution { /** * A constant used when creating a Perl5Substitution indicating that * interpolation variables should be computed relative to the most * recent pattern match. */ public static final int INTERPOLATE_ALL = 0; /** * A constant used when creating a Perl5Substitution indicating that * interpolation variables should be interpreted literally, effectively * disabling interpolation. */ public static final int INTERPOLATE_NONE = -1; /** * The initial size and unit of growth for the * {@link #_subOpCodes _subOpCodes} array. */ private static final int __OPCODE_STORAGE_SIZE = 32; /** * The maximum number of groups supported by interpolation. */ private static final int __MAX_GROUPS = Character.MAX_VALUE; /** * A constant declaring opcode for copy operation. */ static final int _OPCODE_COPY = -1; /** * A constant declaring opcode for lowercase char operation. */ static final int _OPCODE_LOWERCASE_CHAR = -2; /** * A constant declaring opcode for uppercase char operation. */ static final int _OPCODE_UPPERCASE_CHAR = -3; /** * A constant declaring opcode for lowercase mode operation. */ static final int _OPCODE_LOWERCASE_MODE = -4; /** * A constant declaring opcode for lowercase mode operation. */ static final int _OPCODE_UPPERCASE_MODE = -5; /** * A constant declaring opcode for lowercase mode operation. */ static final int _OPCODE_ENDCASE_MODE = -6; int _numInterpolations; int[] _subOpcodes; int _subOpcodesCount; char[] _substitutionChars; transient String _lastInterpolation; private static final boolean __isInterpolationCharacter(char ch) { return (Character.isDigit(ch) || ch == '&'); } private void __addElement(int value) { int len = _subOpcodes.length; if (_subOpcodesCount == len) { int[] newarray = new int[len + __OPCODE_STORAGE_SIZE]; System.arraycopy(_subOpcodes, 0, newarray, 0, len); _subOpcodes = newarray; } _subOpcodes[_subOpcodesCount++] = value; } private void __parseSubs(String sub) { boolean saveDigits, escapeMode, caseMode; int posParam; int offset; char[] subChars = _substitutionChars = sub.toCharArray(); int subLength = subChars.length; _subOpcodes = new int[__OPCODE_STORAGE_SIZE]; _subOpcodesCount = 0; posParam = 0; offset = -1; saveDigits = false; escapeMode = false; caseMode = false; for (int current = 0; current < subLength; current++) { char c = subChars[current]; char nextc; int next = current + 1; // Save digits if (saveDigits) { int digit = Character.digit(c, 10); if (digit > -1) { if (posParam <= __MAX_GROUPS) { posParam *= 10; posParam += digit; } if (next == subLength) { __addElement(posParam); } continue; } else if(c == '&') { if(/*current > 0 &&*/subChars[current - 1] == '$') { __addElement(0); posParam = 0; saveDigits = false; continue; } } __addElement(posParam); posParam = 0; saveDigits = false; } if ((c != '$' && c != '\\') || escapeMode) { escapeMode = false; if (offset < 0) { offset = current;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -