📄 recompiler.java
字号:
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package com.sun.org.apache.regexp.internal;import com.sun.org.apache.regexp.internal.RE;import java.util.Hashtable;/** * A regular expression compiler class. This class compiles a pattern string into a * regular expression program interpretable by the RE evaluator class. The 'recompile' * command line tool uses this compiler to pre-compile regular expressions for use * with RE. For a description of the syntax accepted by RECompiler and what you can * do with regular expressions, see the documentation for the RE matcher class. * * @see RE * @see recompile * * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a> * @version $Id: RECompiler.java,v 1.1.2.1 2005/08/01 00:02:55 jeffsuttor Exp $ */public class RECompiler{ // The compiled program char[] instruction; // The compiled RE 'program' instruction buffer int lenInstruction; // The amount of the program buffer currently in use // Input state for compiling regular expression String pattern; // Input string int len; // Length of the pattern string int idx; // Current input index into ac int parens; // Total number of paren pairs // Node flags static final int NODE_NORMAL = 0; // No flags (nothing special) static final int NODE_NULLABLE = 1; // True if node is potentially null static final int NODE_TOPLEVEL = 2; // True if top level expr // Special types of 'escapes' static final int ESC_MASK = 0xffff0; // Escape complexity mask static final int ESC_BACKREF = 0xfffff; // Escape is really a backreference static final int ESC_COMPLEX = 0xffffe; // Escape isn't really a true character static final int ESC_CLASS = 0xffffd; // Escape represents a whole class of characters // {m,n} stacks int maxBrackets = 10; // Maximum number of bracket pairs static final int bracketUnbounded = -1; // Unbounded value int brackets = 0; // Number of bracket sets int[] bracketStart = null; // Starting point int[] bracketEnd = null; // Ending point int[] bracketMin = null; // Minimum number of matches int[] bracketOpt = null; // Additional optional matches // Lookup table for POSIX character class names static Hashtable hashPOSIX = new Hashtable(); static { hashPOSIX.put("alnum", new Character(RE.POSIX_CLASS_ALNUM)); hashPOSIX.put("alpha", new Character(RE.POSIX_CLASS_ALPHA)); hashPOSIX.put("blank", new Character(RE.POSIX_CLASS_BLANK)); hashPOSIX.put("cntrl", new Character(RE.POSIX_CLASS_CNTRL)); hashPOSIX.put("digit", new Character(RE.POSIX_CLASS_DIGIT)); hashPOSIX.put("graph", new Character(RE.POSIX_CLASS_GRAPH)); hashPOSIX.put("lower", new Character(RE.POSIX_CLASS_LOWER)); hashPOSIX.put("print", new Character(RE.POSIX_CLASS_PRINT)); hashPOSIX.put("punct", new Character(RE.POSIX_CLASS_PUNCT)); hashPOSIX.put("space", new Character(RE.POSIX_CLASS_SPACE)); hashPOSIX.put("upper", new Character(RE.POSIX_CLASS_UPPER)); hashPOSIX.put("xdigit", new Character(RE.POSIX_CLASS_XDIGIT)); hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART)); hashPOSIX.put("javapart", new Character(RE.POSIX_CLASS_JPART)); } /** * Constructor. Creates (initially empty) storage for a regular expression program. */ public RECompiler() { // Start off with a generous, yet reasonable, initial size instruction = new char[128]; lenInstruction = 0; } /** * Ensures that n more characters can fit in the program buffer. * If n more can't fit, then the size is doubled until it can. * @param n Number of additional characters to ensure will fit. */ void ensure(int n) { // Get current program length int curlen = instruction.length; // If the current length + n more is too much if (lenInstruction + n >= curlen) { // Double the size of the program array until n more will fit while (lenInstruction + n >= curlen) { curlen *= 2; } // Allocate new program array and move data into it char[] newInstruction = new char[curlen]; System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction); instruction = newInstruction; } } /** * Emit a single character into the program stream. * @param c Character to add */ void emit(char c) { // Make room for character ensure(1); // Add character instruction[lenInstruction++] = c; } /** * Inserts a node with a given opcode and opdata at insertAt. The node relative next * pointer is initialized to 0. * @param opcode Opcode for new node * @param opdata Opdata for new node (only the low 16 bits are currently used) * @param insertAt Index at which to insert the new node in the program */ void nodeInsert(char opcode, int opdata, int insertAt) { // Make room for a new node ensure(RE.nodeSize); // Move everything from insertAt to the end down nodeSize elements System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt); instruction[insertAt + RE.offsetOpcode] = opcode; instruction[insertAt + RE.offsetOpdata] = (char)opdata; instruction[insertAt + RE.offsetNext] = 0; lenInstruction += RE.nodeSize; } /** * Appends a node to the end of a node chain * @param node Start of node chain to traverse * @param pointTo Node to have the tail of the chain point to */ void setNextOfEnd(int node, int pointTo) { // Traverse the chain until the next offset is 0 int next = instruction[node + RE.offsetNext]; // while the 'node' is not the last in the chain // and the 'node' is not the last in the program. while ( next != 0 && node < lenInstruction ) { // if the node we are supposed to point to is in the chain then // point to the end of the program instead. // Michael McCallum <gholam@xtra.co.nz> // FIXME: // This is a _hack_ to stop infinite programs. // I believe that the implementation of the reluctant matches is wrong but // have not worked out a better way yet. if ( node == pointTo ) { pointTo = lenInstruction; } node += next; next = instruction[node + RE.offsetNext]; } // if we have reached the end of the program then dont set the pointTo. // im not sure if this will break any thing but passes all the tests. if ( node < lenInstruction ) { // Point the last node in the chain to pointTo. instruction[node + RE.offsetNext] = (char)(short)(pointTo - node); } } /** * Adds a new node * @param opcode Opcode for node * @param opdata Opdata for node (only the low 16 bits are currently used) * @return Index of new node in program */ int node(char opcode, int opdata) { // Make room for a new node ensure(RE.nodeSize); // Add new node at end instruction[lenInstruction + RE.offsetOpcode] = opcode; instruction[lenInstruction + RE.offsetOpdata] = (char)opdata; instruction[lenInstruction + RE.offsetNext] = 0; lenInstruction += RE.nodeSize; // Return index of new node return lenInstruction - RE.nodeSize; } /** * Throws a new internal error exception * @exception Error Thrown in the event of an internal error. */ void internalError() throws Error { throw new Error("Internal error!"); } /** * Throws a new syntax error exception * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ void syntaxError(String s) throws RESyntaxException { throw new RESyntaxException(s); } /** * Allocate storage for brackets only as needed */ void allocBrackets() { // Allocate bracket stacks if not already done if (bracketStart == null) { // Allocate storage bracketStart = new int[maxBrackets]; bracketEnd = new int[maxBrackets]; bracketMin = new int[maxBrackets]; bracketOpt = new int[maxBrackets]; // Initialize to invalid values for (int i = 0; i < maxBrackets; i++) { bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1; } } } /** Enlarge storage for brackets only as needed. */ synchronized void reallocBrackets() { // trick the tricky if (bracketStart == null) { allocBrackets(); } int new_size = maxBrackets * 2; int[] new_bS = new int[new_size]; int[] new_bE = new int[new_size]; int[] new_bM = new int[new_size]; int[] new_bO = new int[new_size]; // Initialize to invalid values for (int i=brackets; i<new_size; i++) { new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1; } System.arraycopy(bracketStart,0, new_bS,0, brackets); System.arraycopy(bracketEnd,0, new_bE,0, brackets); System.arraycopy(bracketMin,0, new_bM,0, brackets); System.arraycopy(bracketOpt,0, new_bO,0, brackets); bracketStart = new_bS; bracketEnd = new_bE; bracketMin = new_bM; bracketOpt = new_bO; maxBrackets = new_size; } /** * Match bracket {m,n} expression put results in bracket member variables * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ void bracket() throws RESyntaxException { // Current character must be a '{' if (idx >= len || pattern.charAt(idx++) != '{') { internalError(); } // Next char must be a digit if (idx >= len || !Character.isDigit(pattern.charAt(idx))) { syntaxError("Expected digit"); } // Get min ('m' of {m,n}) number StringBuffer number = new StringBuffer(); while (idx < len && Character.isDigit(pattern.charAt(idx))) { number.append(pattern.charAt(idx++)); } try { bracketMin[brackets] = Integer.parseInt(number.toString()); } catch (NumberFormatException e) { syntaxError("Expected valid number"); } // If out of input, fail if (idx >= len) { syntaxError("Expected comma or right bracket"); } // If end of expr, optional limit is 0 if (pattern.charAt(idx) == '}') { idx++; bracketOpt[brackets] = 0; return; } // Must have at least {m,} and maybe {m,n}. if (idx >= len || pattern.charAt(idx++) != ',') { syntaxError("Expected comma"); } // If out of input, fail if (idx >= len) { syntaxError("Expected comma or right bracket"); } // If {m,} max is unlimited if (pattern.charAt(idx) == '}') { idx++; bracketOpt[brackets] = bracketUnbounded; return; } // Next char must be a digit if (idx >= len || !Character.isDigit(pattern.charAt(idx))) { syntaxError("Expected digit"); } // Get max number number.setLength(0); while (idx < len && Character.isDigit(pattern.charAt(idx))) { number.append(pattern.charAt(idx++)); } try { bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets]; } catch (NumberFormatException e) { syntaxError("Expected valid number"); } // Optional repetitions must be >= 0 if (bracketOpt[brackets] < 0) { syntaxError("Bad range"); } // Must have close brace if (idx >= len || pattern.charAt(idx++) != '}') { syntaxError("Missing close brace"); } } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -