regexp.java

来自「Grammatica是一个C#和Java的语法分析程序生成器(编译器的编译器)。」· Java 代码 · 共 639 行 · 第 1/2 页
JAVA
639 行
/* * RegExp.java * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */package net.percederberg.grammatica.parser.re;import java.io.PrintWriter;import java.io.StringWriter;import java.util.ArrayList;/** * A regular expression. This class creates and holds an internal  * data structure representing a regular expression. It also allows  * creating matchers. This class is thread-safe. Multiple matchers may  * operate simultanously on the same regular expression. * * @author   Per Cederberg, <per at percederberg dot net> * @version  1.0 */public class RegExp {    /**     * The base regular expression element.     */    private Element element;    /**     * The regular expression pattern.     */    private String pattern;    /**     * The current position in the pattern. This variable is used by     * the parsing methods.     */    private int pos;    /**     * Creates a new regular expression.     *      * @param pattern        the regular expression pattern     *      * @throws RegExpException if the regular expression couldn't be     *             parsed correctly     */    public RegExp(String pattern) throws RegExpException {        this.pattern = pattern;        this.pos = 0;        this.element = parseExpr();        if (pos < pattern.length()) {            throw new RegExpException(                RegExpException.UNEXPECTED_CHARACTER,                pos,                pattern);        }    }    /**     * Creates a new matcher for the specified string.     *      * @param str            the string to work with     *      * @return the regular expresion matcher     */    public Matcher matcher(CharBuffer str) {        return new Matcher((Element) element.clone(), str);    }    /**     * Creates a new matcher for the specified string.     *      * @param str            the string to work with     *      * @return the regular expresion matcher     */    public Matcher matcher(String str) {        return matcher(new CharBuffer(str));    }    /**     * Creates a new matcher for the specified string.     *      * @param str            the string to work with     *      * @return the regular expresion matcher     */    public Matcher matcher(StringBuffer str) {        return matcher(new CharBuffer(str));    }    /**     * Returns a string representation of the regular expression.     *      * @return a string representation of the regular expression     */    public String toString() {        StringWriter  str;                str = new StringWriter();        str.write("  Regexp Pattern: " + pattern + "\n");        str.write("  Regexp Compiled:\n");        element.printTo(new PrintWriter(str), "    ");        return str.toString();    }    /**     * Parses a regular expression. This method handles the Expr     * production in the grammar (see regexp.grammar).     *      * @return the element representing this expression     *      * @throws RegExpException if an error was encountered in the      *             pattern string     */    private Element parseExpr() throws RegExpException {        Element  first;        Element  second;                first = parseTerm();        if (peekChar(0) != '|') {            return first;        } else {            readChar('|');            second = parseExpr();            return new AlternativeElement(first, second);        }    }        /**     * Parses a regular expression term. This method handles the      * Term production in the grammar (see regexp.grammar).     *      * @return the element representing this term     *      * @throws RegExpException if an error was encountered in the      *             pattern string     */    private Element parseTerm() throws RegExpException {        ArrayList  list = new ArrayList();                list.add(parseFact());        while (true) {            switch (peekChar(0)) {            case -1:            case ')':            case ']':            case '{':            case '}':            case '?':            case '+':            case '|':                return combineElements(list);            default:                list.add(parseFact());            }        }    }    /**     * Parses a regular expression factor. This method handles the      * Fact production in the grammar (see regexp.grammar).     *      * @return the element representing this factor     *      * @throws RegExpException if an error was encountered in the      *             pattern string     */    private Element parseFact() throws RegExpException {        Element  elem;        elem = parseAtom();        switch (peekChar(0)) {        case '?':        case '*':        case '+':        case '{':            return parseAtomModifier(elem);        default:            return elem;        }    }    /**     * Parses a regular expression atom. This method handles the      * Atom production in the grammar (see regexp.grammar).     *      * @return the element representing this atom     *      * @throws RegExpException if an error was encountered in the      *             pattern string     */    private Element parseAtom() throws RegExpException {        Element  elem;        switch (peekChar(0)) {        case '.':            readChar('.');            return CharacterSetElement.DOT;        case '(':            readChar('(');            elem = parseExpr();            readChar(')');            return elem;        case '[':            readChar('[');            elem = parseCharSet();            readChar(']');            return elem;        case -1:        case ')':        case ']':        case '{':        case '}':        case '?':        case '*':        case '+':        case '|':            throw new RegExpException(                RegExpException.UNEXPECTED_CHARACTER,                pos,                pattern);        default:            return parseChar();        }    }    /**     * Parses a regular expression atom modifier. This method handles      * the AtomModifier production in the grammar (see regexp.grammar).     *     * @param elem           the element to modify     *       * @return the modified element      *      * @throws RegExpException if an error was encountered in the      *             pattern string     */    private Element parseAtomModifier(Element elem) throws RegExpException {        int  min = 0;        int  max = -1;        int  type = RepeatElement.GREEDY;        int  firstPos;        // Read min and max        switch (readChar()) {        case '?':            min = 0;            max = 1;            break;        case '*':            min = 0;            max = -1;            break;        case '+':            min = 1;            max = -1;            break;        case '{':            firstPos = pos - 1;            min = readNumber();            max = min;            if (peekChar(0) == ',') {                readChar(',');                max = -1;                if (peekChar(0) != '}') {                    max = readNumber();                }            }            readChar('}');            if (max == 0 || (max > 0 && min > max)) {                throw new RegExpException(                    RegExpException.INVALID_REPEAT_COUNT,                    firstPos,                    pattern);            }            break;        default:            throw new RegExpException(                RegExpException.UNEXPECTED_CHARACTER,                pos - 1,                pattern);        }        // Read operator mode        if (peekChar(0) == '?') {            readChar('?');            type = RepeatElement.RELUCTANT;        } else if (peekChar(0) == '+') {            readChar('+');            type = RepeatElement.POSSESSIVE;        }
regexp.java - 源码说明

本页面展示了「Grammatica是一个C#和Java的语法分析程序生成器(编译器的编译器)。它可以用LL(k)语法创建可读的和带有注释的源代码。它也支持创建一个运行时语法分析器」中的 regexp.java 源码文件，采用 Java 编程语言编写，共 639 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Grammatica相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?