⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 charactersetelement.cs

📁 Grammatica is a C# and Java parser generator (compiler compiler). It improves upon simlar tools (lik
💻 CS
字号:
/* * CharacterSetElement.cs * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */using System.Collections;using System.IO;using System.Text;namespace PerCederberg.Grammatica.Parser.RE {    /**     * A regular expression character set element. This element     * matches a single character inside (or outside) a character set.     * The character set is user defined and may contain ranges of     * characters. The set may also be inverted, meaning that only     * characters not inside the set will be considered to match.     *     * @author   Per Cederberg, <per at percederberg dot net>     * @version  1.0     */    internal class CharacterSetElement : Element {        /**         * The dot ('.') character set. This element matches a single         * character that is not equal to a newline character.         */        public static CharacterSetElement DOT =             new CharacterSetElement(false);        /**         * The digit character set. This element matches a single         * numeric character.         */        public static CharacterSetElement DIGIT =             new CharacterSetElement(false);        /**         * The non-digit character set. This element matches a single         * non-numeric character.         */        public static CharacterSetElement NON_DIGIT =             new CharacterSetElement(true);        /**         * The whitespace character set. This element matches a single         * whitespace character.         */        public static CharacterSetElement WHITESPACE =             new CharacterSetElement(false);        /**         * The non-whitespace character set. This element matches a         * single non-whitespace character.         */        public static CharacterSetElement NON_WHITESPACE =             new CharacterSetElement(true);        /**         * The word character set. This element matches a single word         * character.         */        public static CharacterSetElement WORD =             new CharacterSetElement(false);        /**         * The non-word character set. This element matches a single         * non-word character.         */        public static CharacterSetElement NON_WORD =             new CharacterSetElement(true);        /**         * The inverted character set flag.         */        private bool inverted;        /**         * The character set content. This array may contain either         * range objects or Character objects.         */        private ArrayList contents = new ArrayList();        /**         * Creates a new character set element. If the inverted character          * set flag is set, only characters NOT in the set will match.         *          * @param inverted       the inverted character set flag         */        public CharacterSetElement(bool inverted) {            this.inverted = inverted;        }        /**         * Adds a single character to this character set.         *          * @param c              the character to add         */        public void AddCharacter(char c) {            contents.Add(c);        }        /**         * Adds multiple characters to this character set.         *          * @param str            the string with characters to add         */        public void AddCharacters(string str) {            for (int i = 0; i < str.Length; i++) {                AddCharacter(str[i]);            }        }        /**         * Adds multiple characters to this character set.         *          * @param elem           the string element with characters to add         */        public void AddCharacters(StringElement elem) {            AddCharacters(elem.GetString());        }        /**         * Adds a character range to this character set.         *          * @param min            the minimum character value         * @param max            the maximum character value         */        public void AddRange(char min, char max) {            contents.Add(new Range(min, max));        }        /**         * Adds a character subset to this character set.         *          * @param elem           the character set to add         */        public void AddCharacterSet(CharacterSetElement elem) {            contents.Add(elem);        }        /**         * Returns this element as the character set shouldn't be         * modified after creation. This partially breaks the contract         * of clone(), but as new characters are not added to the         * character set after creation, this will work correctly.         *          * @return this character set element         */        public override object Clone() {            return this;        }        /**         * Returns the length of a matching string starting at the          * specified position. The number of matches to skip can also be         * specified, but numbers higher than zero (0) cause a failed          * match for any element that doesn't attempt to combine other          * elements.         *         * @param m              the matcher being used          * @param str            the string to match         * @param start          the starting position         * @param skip           the number of matches to skip         *          * @return the length of the matching string, or         *         -1 if no match was found         */        public override int Match(Matcher m,                                   string str,                                   int start,                                   int skip) {            char  c;                        if (skip != 0) {                return -1;            }            if (start >= str.Length) {                m.SetReadEndOfString();                return -1;            }            c = str[start];            return InSet(c) ? 1 : -1;        }        /**         * Checks if the specified character matches this character         * set. This method takes the inverted flag into account.         *          * @param c               the character to check         *          * @return true if the character matches, or         *         false otherwise         */        private bool InSet(char c) {            if (this == DOT) {                return InDotSet(c);            } else if (this == DIGIT || this == NON_DIGIT) {                return InDigitSet(c) != inverted;            } else if (this == WHITESPACE || this == NON_WHITESPACE) {                return InWhitespaceSet(c) != inverted;            } else if (this == WORD || this == NON_WORD) {                return InWordSet(c) != inverted;            } else {                return InUserSet(c) != inverted;            }        }                /**         * Checks if the specified character is present in the 'dot'         * set. This method does not consider the inverted flag.         *          * @param c               the character to check         *          * @return true if the character is present, or         *         false otherwise         */        private bool InDotSet(char c) {            switch (c) {            case '\n':            case '\r':            case '\u0085':            case '\u2028':            case '\u2029':                return false;            default:                return true;            }        }        /**         * Checks if the specified character is a digit. This method         * does not consider the inverted flag.         *          * @param c               the character to check         *          * @return true if the character is a digit, or         *         false otherwise         */        private bool InDigitSet(char c) {            return '0' <= c && c <= '9';        }        /**         * Checks if the specified character is a whitespace         * character. This method does not consider the inverted flag.         *          * @param c               the character to check         *          * @return true if the character is a whitespace character, or         *         false otherwise         */        private bool InWhitespaceSet(char c) {            switch (c) {            case ' ':            case '\t':            case '\n':            case '\f':            case '\r':            case (char) 11:                return true;            default:                return false;            }        }        /**         * Checks if the specified character is a word character. This         * method does not consider the inverted flag.         *          * @param c               the character to check         *          * @return true if the character is a word character, or         *         false otherwise         */        private bool InWordSet(char c) {            return ('a' <= c && c <= 'z')                || ('A' <= c && c <= 'Z')                || ('0' <= c && c <= '9')                || c == '_';        }        /**         * Checks if the specified character is present in the user-         * defined set. This method does not consider the inverted         * flag.         *          * @param value           the character to check         *          * @return true if the character is present, or         *         false otherwise         */        private bool InUserSet(char value) {            object               obj;            char                 c;            Range                r;            CharacterSetElement  e;            for (int i = 0; i < contents.Count; i++) {                obj = contents[i];                if (obj is char) {                    c = (char) obj;                    if (c == value) {                        return true;                    }                } else if (obj is Range) {                    r = (Range) obj;                    if (r.Inside(value)) {                        return true;                    }                } else if (obj is CharacterSetElement) {                    e = (CharacterSetElement) obj;                    if (e.InSet(value)) {                        return true;                    }                }            }            return false;        }            /**         * Prints this element to the specified output stream.         *          * @param output         the output stream to use         * @param indent         the current indentation         */        public override void PrintTo(TextWriter output, string indent) {            output.WriteLine(indent + ToString());        }        /**         * Returns a string description of this character set.         *          * @return a string description of this character set         */        public override string ToString() {            StringBuilder  buffer;            // Handle predefined character sets            if (this == DOT) {                return ".";            } else if (this == DIGIT) {                return "\\d";            } else if (this == NON_DIGIT) {                return "\\D";            } else if (this == WHITESPACE) {                return "\\s";            } else if (this == NON_WHITESPACE) {                return "\\S";            } else if (this == WORD) {                return "\\w";            } else if (this == NON_WORD) {                return "\\W";            }            // Handle user-defined character sets                    buffer = new StringBuilder();            if (inverted) {                buffer.Append("^[");            } else {                buffer.Append("[");            }            for (int i = 0; i < contents.Count; i++) {                buffer.Append(contents[i]);            }            buffer.Append("]");                        return buffer.ToString();        }        /**         * A character range class.         */            private class Range {                        /**             * The minimum character value.             */            private char min;                        /**             * The maximum character value.             */            private char max;                        /**             * Creates a new character range.             *              * @param min        the minimum character value             * @param max        the maximum character value             */            public Range(char min, char max) {                this.min = min;                this.max = max;            }                        /**             * Checks if the specified character is inside the range.             *              * @param c          the character to check             *              * @return true if the character is in the range, or             *         false otherwise             */            public bool Inside(char c) {                return min <= c && c <= max;            }                        /**             * Returns a string representation of this object.             *              * @return a string representation of this object             */            public override string ToString() {                return min + "-" + max;            }        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -