⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 recompiler.java

📁 jakarta-regexp-1.5 正则表达式的源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.regexp;import java.util.Hashtable;/** * A regular expression compiler class.  This class compiles a pattern string into a * regular expression program interpretable by the RE evaluator class.  The 'recompile' * command line tool uses this compiler to pre-compile regular expressions for use * with RE.  For a description of the syntax accepted by RECompiler and what you can * do with regular expressions, see the documentation for the RE matcher class. * * @see RE * @see recompile * * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a> * @version $Id: RECompiler.java 518156 2007-03-14 14:31:26Z vgritsenko $ */public class RECompiler{    // The compiled program    char[] instruction;                                 // The compiled RE 'program' instruction buffer    int lenInstruction;                                 // The amount of the program buffer currently in use    // Input state for compiling regular expression    String pattern;                                     // Input string    int len;                                            // Length of the pattern string    int idx;                                            // Current input index into ac    int parens;                                         // Total number of paren pairs    // Node flags    static final int NODE_NORMAL   = 0;                 // No flags (nothing special)    static final int NODE_NULLABLE = 1;                 // True if node is potentially null    static final int NODE_TOPLEVEL = 2;                 // True if top level expr    // Special types of 'escapes'    static final int ESC_MASK      = 0xffff0;           // Escape complexity mask    static final int ESC_BACKREF   = 0xfffff;           // Escape is really a backreference    static final int ESC_COMPLEX   = 0xffffe;           // Escape isn't really a true character    static final int ESC_CLASS     = 0xffffd;           // Escape represents a whole class of characters    // {m,n} stacks    static final int bracketUnbounded = -1;             // Unbounded value    int bracketMin;                                     // Minimum number of matches    int bracketOpt;                                     // Additional optional matches    // Lookup table for POSIX character class names    static final Hashtable hashPOSIX = new Hashtable();    static    {        hashPOSIX.put("alnum",     new Character(RE.POSIX_CLASS_ALNUM));        hashPOSIX.put("alpha",     new Character(RE.POSIX_CLASS_ALPHA));        hashPOSIX.put("blank",     new Character(RE.POSIX_CLASS_BLANK));        hashPOSIX.put("cntrl",     new Character(RE.POSIX_CLASS_CNTRL));        hashPOSIX.put("digit",     new Character(RE.POSIX_CLASS_DIGIT));        hashPOSIX.put("graph",     new Character(RE.POSIX_CLASS_GRAPH));        hashPOSIX.put("lower",     new Character(RE.POSIX_CLASS_LOWER));        hashPOSIX.put("print",     new Character(RE.POSIX_CLASS_PRINT));        hashPOSIX.put("punct",     new Character(RE.POSIX_CLASS_PUNCT));        hashPOSIX.put("space",     new Character(RE.POSIX_CLASS_SPACE));        hashPOSIX.put("upper",     new Character(RE.POSIX_CLASS_UPPER));        hashPOSIX.put("xdigit",    new Character(RE.POSIX_CLASS_XDIGIT));        hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART));        hashPOSIX.put("javapart",  new Character(RE.POSIX_CLASS_JPART));    }    /**     * Constructor.  Creates (initially empty) storage for a regular expression program.     */    public RECompiler()    {        // Start off with a generous, yet reasonable, initial size        instruction = new char[128];        lenInstruction = 0;    }    /**     * Ensures that n more characters can fit in the program buffer.     * If n more can't fit, then the size is doubled until it can.     * @param n Number of additional characters to ensure will fit.     */    void ensure(int n)    {        // Get current program length        int curlen = instruction.length;        // If the current length + n more is too much        if (lenInstruction + n >= curlen)        {            // Double the size of the program array until n more will fit            while (lenInstruction + n >= curlen)            {                curlen *= 2;            }            // Allocate new program array and move data into it            char[] newInstruction = new char[curlen];            System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction);            instruction = newInstruction;        }    }    /**     * Emit a single character into the program stream.     * @param c Character to add     */    void emit(char c)    {        // Make room for character        ensure(1);        // Add character        instruction[lenInstruction++] = c;    }    /**     * Inserts a node with a given opcode and opdata at insertAt.  The node relative next     * pointer is initialized to 0.     * @param opcode Opcode for new node     * @param opdata Opdata for new node (only the low 16 bits are currently used)     * @param insertAt Index at which to insert the new node in the program     */    void nodeInsert(char opcode, int opdata, int insertAt)    {        // Make room for a new node        ensure(RE.nodeSize);        // Move everything from insertAt to the end down nodeSize elements        System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt);        instruction[insertAt /* + RE.offsetOpcode */] = opcode;        instruction[insertAt    + RE.offsetOpdata   ] = (char) opdata;        instruction[insertAt    + RE.offsetNext     ] = 0;        lenInstruction += RE.nodeSize;    }    /**     * Appends a node to the end of a node chain     * @param node Start of node chain to traverse     * @param pointTo Node to have the tail of the chain point to     */    void setNextOfEnd(int node, int pointTo)    {        // Traverse the chain until the next offset is 0        int next = instruction[node + RE.offsetNext];        // while the 'node' is not the last in the chain        // and the 'node' is not the last in the program.        while ( next != 0 && node < lenInstruction )        {            // if the node we are supposed to point to is in the chain then            // point to the end of the program instead.            // Michael McCallum <gholam@xtra.co.nz>            // FIXME: This is a _hack_ to stop infinite programs.            // I believe that the implementation of the reluctant matches is wrong but            // have not worked out a better way yet.            if (node == pointTo) {                pointTo = lenInstruction;            }            node += next;            next = instruction[node + RE.offsetNext];        }        // if we have reached the end of the program then dont set the pointTo.        // im not sure if this will break any thing but passes all the tests.        if ( node < lenInstruction ) {            // Some patterns result in very large programs which exceed            // capacity of the short used for specifying signed offset of the            // next instruction. Example: a{1638}            int offset = pointTo - node;            if (offset != (short) offset) {                throw new RESyntaxException("Exceeded short jump range.");            }            // Point the last node in the chain to pointTo.            instruction[node + RE.offsetNext] = (char) (short) offset;        }    }    /**     * Adds a new node     * @param opcode Opcode for node     * @param opdata Opdata for node (only the low 16 bits are currently used)     * @return Index of new node in program     */    int node(char opcode, int opdata)    {        // Make room for a new node        ensure(RE.nodeSize);        // Add new node at end        instruction[lenInstruction /* + RE.offsetOpcode */] = opcode;        instruction[lenInstruction    + RE.offsetOpdata   ] = (char) opdata;        instruction[lenInstruction    + RE.offsetNext     ] = 0;        lenInstruction += RE.nodeSize;        // Return index of new node        return lenInstruction - RE.nodeSize;    }    /**     * Throws a new internal error exception     * @exception Error Thrown in the event of an internal error.     */    void internalError() throws Error    {        throw new Error("Internal error!");    }    /**     * Throws a new syntax error exception     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.     */    void syntaxError(String s) throws RESyntaxException    {        throw new RESyntaxException(s);    }    /**     * Match bracket {m,n} expression put results in bracket member variables     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.     */    void bracket() throws RESyntaxException    {        // Current character must be a '{'        if (idx >= len || pattern.charAt(idx++) != '{')        {            internalError();        }        // Next char must be a digit        if (idx >= len || !Character.isDigit(pattern.charAt(idx)))        {            syntaxError("Expected digit");        }        // Get min ('m' of {m,n}) number        StringBuffer number = new StringBuffer();        while (idx < len && Character.isDigit(pattern.charAt(idx)))        {            number.append(pattern.charAt(idx++));        }        try        {            bracketMin = Integer.parseInt(number.toString());        }        catch (NumberFormatException e)        {            syntaxError("Expected valid number");        }        // If out of input, fail        if (idx >= len)        {            syntaxError("Expected comma or right bracket");        }        // If end of expr, optional limit is 0        if (pattern.charAt(idx) == '}')        {            idx++;            bracketOpt = 0;            return;        }        // Must have at least {m,} and maybe {m,n}.        if (idx >= len || pattern.charAt(idx++) != ',')        {            syntaxError("Expected comma");        }        // If out of input, fail        if (idx >= len)        {            syntaxError("Expected comma or right bracket");        }        // If {m,} max is unlimited        if (pattern.charAt(idx) == '}')        {            idx++;            bracketOpt = bracketUnbounded;            return;        }        // Next char must be a digit        if (idx >= len || !Character.isDigit(pattern.charAt(idx)))        {            syntaxError("Expected digit");        }        // Get max number        number.setLength(0);        while (idx < len && Character.isDigit(pattern.charAt(idx)))        {            number.append(pattern.charAt(idx++));        }        try        {            bracketOpt = Integer.parseInt(number.toString()) - bracketMin;        }        catch (NumberFormatException e)        {            syntaxError("Expected valid number");        }        // Optional repetitions must be >= 0        if (bracketOpt < 0)        {            syntaxError("Bad range");        }        // Must have close brace        if (idx >= len || pattern.charAt(idx++) != '}')        {            syntaxError("Missing close brace");        }    }    /**     * Match an escape sequence.  Handles quoted chars and octal escapes as well     * as normal escape characters.  Always advances the input stream by the     * right amount. This code "understands" the subtle difference between an     * octal escape and a backref.  You can access the type of ESC_CLASS or     * ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1].     * @return ESC_* code or character if simple escape     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.     */    int escape() throws RESyntaxException    {        // "Shouldn't" happen        if (pattern.charAt(idx) != '\\')        {            internalError();        }        // Escape shouldn't occur as last character in string!        if (idx + 1 == len)        {            syntaxError("Escape terminates string");        }        // Switch on character after backslash        idx += 2;        char escapeChar = pattern.charAt(idx - 1);        switch (escapeChar)        {            case RE.E_BOUND:            case RE.E_NBOUND:                return ESC_COMPLEX;            case RE.E_ALNUM:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -