📄 recompiler.java
字号:
package org.apache.regexp;
/*
* ====================================================================
*
* The Apache Software License, Version 1.1
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
* Foundation" must not be used to endorse or promote products derived
* from this software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* nor may "Apache" appear in their names without prior written
* permission of the Apache Group.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
import org.apache.regexp.RE;
import java.util.Hashtable;
/**
* A regular expression compiler class. This class compiles a pattern string into a
* regular expression program interpretable by the RE evaluator class. The 'recompile'
* command line tool uses this compiler to pre-compile regular expressions for use
* with RE. For a description of the syntax accepted by RECompiler and what you can
* do with regular expressions, see the documentation for the RE matcher class.
*
* @see RE
* @see recompile
*
* @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
* @version $Id: RECompiler.java,v 1.1.1.1 2002/01/31 03:14:36 rcm Exp $
*/
public class RECompiler
{
// The compiled program
char[] instruction; // The compiled RE 'program' instruction buffer
int lenInstruction; // The amount of the program buffer currently in use
// Input state for compiling regular expression
String pattern; // Input string
int len; // Length of the pattern string
int idx; // Current input index into ac
int parens; // Total number of paren pairs
// Node flags
static final int NODE_NORMAL = 0; // No flags (nothing special)
static final int NODE_NULLABLE = 1; // True if node is potentially null
static final int NODE_TOPLEVEL = 2; // True if top level expr
// Special types of 'escapes'
static final char ESC_MASK = 0xfff0; // Escape complexity mask
static final char ESC_BACKREF = 0xffff; // Escape is really a backreference
static final char ESC_COMPLEX = 0xfffe; // Escape isn't really a true character
static final char ESC_CLASS = 0xfffd; // Escape represents a whole class of characters
// {m,n} stacks
static final int maxBrackets = 10; // Maximum number of bracket pairs
static int brackets = 0; // Number of bracket sets
static int[] bracketStart = null; // Starting point
static int[] bracketEnd = null; // Ending point
static int[] bracketMin = null; // Minimum number of matches
static int[] bracketOpt = null; // Additional optional matches
static final int bracketUnbounded = -1; // Unbounded value
static final int bracketFinished = -2; // Unbounded value
// Lookup table for POSIX character class names
static Hashtable hashPOSIX = new Hashtable();
static
{
hashPOSIX.put("alnum", new Character(RE.POSIX_CLASS_ALNUM));
hashPOSIX.put("alpha", new Character(RE.POSIX_CLASS_ALPHA));
hashPOSIX.put("blank", new Character(RE.POSIX_CLASS_BLANK));
hashPOSIX.put("cntrl", new Character(RE.POSIX_CLASS_CNTRL));
hashPOSIX.put("digit", new Character(RE.POSIX_CLASS_DIGIT));
hashPOSIX.put("graph", new Character(RE.POSIX_CLASS_GRAPH));
hashPOSIX.put("lower", new Character(RE.POSIX_CLASS_LOWER));
hashPOSIX.put("print", new Character(RE.POSIX_CLASS_PRINT));
hashPOSIX.put("punct", new Character(RE.POSIX_CLASS_PUNCT));
hashPOSIX.put("space", new Character(RE.POSIX_CLASS_SPACE));
hashPOSIX.put("upper", new Character(RE.POSIX_CLASS_UPPER));
hashPOSIX.put("xdigit", new Character(RE.POSIX_CLASS_XDIGIT));
hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART));
hashPOSIX.put("javapart", new Character(RE.POSIX_CLASS_JPART));
}
/**
* Constructor. Creates (initially empty) storage for a regular expression program.
*/
public RECompiler()
{
// Start off with a generous, yet reasonable, initial size
instruction = new char[128];
lenInstruction = 0;
}
/**
* Ensures that n more characters can fit in the program buffer.
* If n more can't fit, then the size is doubled until it can.
* @param n Number of additional characters to ensure will fit.
*/
void ensure(int n)
{
// Get current program length
int curlen = instruction.length;
// If the current length + n more is too much
if (lenInstruction + n >= curlen)
{
// Double the size of the program array until n more will fit
while (lenInstruction + n >= curlen)
{
curlen *= 2;
}
// Allocate new program array and move data into it
char[] newInstruction = new char[curlen];
System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction);
instruction = newInstruction;
}
}
/**
* Emit a single character into the program stream.
* @param c Character to add
*/
void emit(char c)
{
// Make room for character
ensure(1);
// Add character
instruction[lenInstruction++] = c;
}
/**
* Inserts a node with a given opcode and opdata at insertAt. The node relative next
* pointer is initialized to 0.
* @param opcode Opcode for new node
* @param opdata Opdata for new node (only the low 16 bits are currently used)
* @param insertAt Index at which to insert the new node in the program
*/
void nodeInsert(char opcode, int opdata, int insertAt)
{
// Make room for a new node
ensure(RE.nodeSize);
// Move everything from insertAt to the end down nodeSize elements
System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt);
instruction[insertAt + RE.offsetOpcode] = opcode;
instruction[insertAt + RE.offsetOpdata] = (char)opdata;
instruction[insertAt + RE.offsetNext] = 0;
lenInstruction += RE.nodeSize;
}
/**
* Appends a node to the end of a node chain
* @param node Start of node chain to traverse
* @param pointTo Node to have the tail of the chain point to
*/
void setNextOfEnd(int node, int pointTo)
{
// Traverse the chain until the next offset is 0
int next;
while ((next = instruction[node + RE.offsetNext]) != 0)
{
node += next;
}
// Point the last node in the chain to pointTo.
instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
}
/**
* Adds a new node
* @param opcode Opcode for node
* @param opdata Opdata for node (only the low 16 bits are currently used)
* @return Index of new node in program
*/
int node(char opcode, int opdata)
{
// Make room for a new node
ensure(RE.nodeSize);
// Add new node at end
instruction[lenInstruction + RE.offsetOpcode] = opcode;
instruction[lenInstruction + RE.offsetOpdata] = (char)opdata;
instruction[lenInstruction + RE.offsetNext] = 0;
lenInstruction += RE.nodeSize;
// Return index of new node
return lenInstruction - RE.nodeSize;
}
/**
* Throws a new internal error exception
* @exception Error Thrown in the event of an internal error.
*/
void internalError() throws Error
{
throw new Error("Internal error!");
}
/**
* Throws a new syntax error exception
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
*/
void syntaxError(String s) throws RESyntaxException
{
throw new RESyntaxException(s);
}
/**
* Allocate storage for brackets only as needed
*/
void allocBrackets()
{
// Allocate bracket stacks if not already done
if (bracketStart == null)
{
// Allocate storage
bracketStart = new int[maxBrackets];
bracketEnd = new int[maxBrackets];
bracketMin = new int[maxBrackets];
bracketOpt = new int[maxBrackets];
// Initialize to invalid values
for (int i = 0; i < maxBrackets; i++)
{
bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1;
}
}
}
/**
* Match bracket {m,n} expression put results in bracket member variables
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
*/
void bracket() throws RESyntaxException
{
// Current character must be a '{'
if (idx >= len || pattern.charAt(idx++) != '{')
{
internalError();
}
// Next char must be a digit
if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
{
syntaxError("Expected digit");
}
// Get min ('m' of {m,n}) number
StringBuffer number = new StringBuffer();
while (idx < len && Character.isDigit(pattern.charAt(idx)))
{
number.append(pattern.charAt(idx++));
}
try
{
bracketMin[brackets] = Integer.parseInt(number.toString());
}
catch (NumberFormatException e)
{
syntaxError("Expected valid number");
}
// If out of input, fail
if (idx >= len)
{
syntaxError("Expected comma or right bracket");
}
// If end of expr, optional limit is 0
if (pattern.charAt(idx) == '}')
{
idx++;
bracketOpt[brackets] = 0;
return;
}
// Must have at least {m,} and maybe {m,n}.
if (idx >= len || pattern.charAt(idx++) != ',')
{
syntaxError("Expected comma");
}
// If out of input, fail
if (idx >= len)
{
syntaxError("Expected comma or right bracket");
}
// If {m,} max is unlimited
if (pattern.charAt(idx) == '}')
{
idx++;
bracketOpt[brackets] = bracketUnbounded;
return;
}
// Next char must be a digit
if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
{
syntaxError("Expected digit");
}
// Get max number
number.setLength(0);
while (idx < len && Character.isDigit(pattern.charAt(idx)))
{
number.append(pattern.charAt(idx++));
}
try
{
bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets];
}
catch (NumberFormatException e)
{
syntaxError("Expected valid number");
}
// Optional repetitions must be > 0
if (bracketOpt[brackets] <= 0)
{
syntaxError("Bad range");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -