📄 regexp.cs

📁 Grammatica is a C# and Java parser generator (compiler compiler). It improves upon simlar tools (lik
💻 CS
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * RegExp.cs * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */using System;using System.Collections;using System.IO;using System.Globalization;using System.Text;namespace PerCederberg.Grammatica.Parser.RE {    /**     * A regular expression. This class creates and holds an internal     * data structure representing a regular expression. It also     * allows creating matchers. This class is thread-safe. Multiple     * matchers may operate simultanously on the same regular     * expression.     *     * @author   Per Cederberg, <per at percederberg dot net>     * @version  1.0     */    public class RegExp {        /**         * The base regular expression element.         */        private Element element;        /**         * The regular expression pattern.         */        private string pattern;        /**         * The current position in the pattern. This variable is used by         * the parsing methods.         */        private int pos;        /**         * Creates a new regular expression.         *          * @param pattern        the regular expression pattern         *          * @throws RegExpException if the regular expression couldn't be         *             parsed correctly         */        public RegExp(string pattern) {            this.pattern = pattern;            this.pos = 0;            this.element = ParseExpr();            if (pos < pattern.Length) {                throw new RegExpException(                    RegExpException.ErrorType.UNEXPECTED_CHARACTER,                    pos,                    pattern);            }        }        /**         * Creates a new matcher for the specified string.         *          * @param str            the string to work with         *          * @return the regular expresion matcher         */        public Matcher Matcher(string str) {            return new Matcher((Element) element.Clone(), str);        }        /**         * Returns a string representation of the regular expression.         *          * @return a string representation of the regular expression         */        public override string ToString() {            StringWriter  str;                        str = new StringWriter();            str.WriteLine("Regular Expression");            str.WriteLine("  Pattern: " + pattern);            str.WriteLine("  Compiled:");            element.PrintTo(str, "    ");            return str.ToString();        }        /**         * Parses a regular expression. This method handles the Expr         * production in the grammar (see regexp.grammar).         *          * @return the element representing this expression         *          * @throws RegExpException if an error was encountered in the          *             pattern string         */        private Element ParseExpr() {            Element  first;            Element  second;                    first = ParseTerm();            if (PeekChar(0) != '|') {                return first;            } else {                ReadChar('|');                second = ParseExpr();                return new AlternativeElement(first, second);            }        }            /**         * Parses a regular expression term. This method handles the          * Term production in the grammar (see regexp.grammar).         *          * @return the element representing this term         *          * @throws RegExpException if an error was encountered in the          *             pattern string         */        private Element ParseTerm() {            ArrayList  list = new ArrayList();                    list.Add(ParseFact());            while (true) {                switch (PeekChar(0)) {                case -1:                case ')':                case ']':                case '{':                case '}':                case '?':                case '+':                case '|':                    return CombineElements(list);                default:                    list.Add(ParseFact());                    break;                }            }        }        /**         * Parses a regular expression factor. This method handles the          * Fact production in the grammar (see regexp.grammar).         *          * @return the element representing this factor         *          * @throws RegExpException if an error was encountered in the          *             pattern string         */        private Element ParseFact() {            Element  elem;            elem = ParseAtom();            switch (PeekChar(0)) {            case '?':            case '*':            case '+':            case '{':                return ParseAtomModifier(elem);            default:                return elem;            }        }        /**         * Parses a regular expression atom. This method handles the          * Atom production in the grammar (see regexp.grammar).         *          * @return the element representing this atom         *          * @throws RegExpException if an error was encountered in the          *             pattern string         */        private Element ParseAtom() {            Element  elem;            switch (PeekChar(0)) {            case '.':                ReadChar('.');                return CharacterSetElement.DOT;            case '(':                ReadChar('(');                elem = ParseExpr();                ReadChar(')');                return elem;            case '[':                ReadChar('[');                elem = ParseCharSet();                ReadChar(']');                return elem;            case -1:            case ')':            case ']':            case '{':            case '}':            case '?':            case '*':            case '+':            case '|':                throw new RegExpException(                    RegExpException.ErrorType.UNEXPECTED_CHARACTER,                    pos,                    pattern);            default:                return ParseChar();            }        }        /**         * Parses a regular expression atom modifier. This method handles          * the AtomModifier production in the grammar (see regexp.grammar).         *         * @param elem           the element to modify         *           * @return the modified element          *          * @throws RegExpException if an error was encountered in the          *             pattern string         */        private Element ParseAtomModifier(Element elem) {            int                       min = 0;            int                       max = -1;            RepeatElement.RepeatType  type;            int                       firstPos;            // Read min and max            type = RepeatElement.RepeatType.GREEDY;            switch (ReadChar()) {            case '?':                min = 0;                max = 1;                break;            case '*':                min = 0;                max = -1;                break;            case '+':                min = 1;                max = -1;                break;            case '{':                firstPos = pos - 1;                min = ReadNumber();                max = min;                if (PeekChar(0) == ',') {                    ReadChar(',');                    max = -1;                    if (PeekChar(0) != '}') {                        max = ReadNumber();                    }                }                ReadChar('}');                if (max == 0 || (max > 0 && min > max)) {                    throw new RegExpException(                        RegExpException.ErrorType.INVALID_REPEAT_COUNT,                        firstPos,                        pattern);                }                break;            default:                throw new RegExpException(                    RegExpException.ErrorType.UNEXPECTED_CHARACTER,                    pos - 1,                    pattern);            }                        // Read operator mode            if (PeekChar(0) == '?') {                ReadChar('?');                type = RepeatElement.RepeatType.RELUCTANT;            } else if (PeekChar(0) == '+') {                ReadChar('+');                type = RepeatElement.RepeatType.POSSESSIVE;            }                        return new RepeatElement(elem, min, max, type);        }        /**         * Parses a regular expression character set. This method handles          * the contents of the '[...]' construct in a regular expression.         *
12 下一页
💿 文件大小 619 K
👤 上传用户 laosoler
📂 所属分类编译器/解释器
🏷️ 相关标签

#compiler #Grammatica #generator #improves
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -