⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 charactersetelement.java

📁 Grammatica is a C# and Java parser generator (compiler compiler). It improves upon simlar tools (lik
💻 JAVA
字号:
/* * CharacterSetElement.java * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */package net.percederberg.grammatica.parser.re;import java.io.PrintWriter;import java.util.ArrayList;/** * A regular expression character set element. This element matches a  * single character inside (or outside) a character set. The character * set is user defined and may contain ranges of characters. The set * may also be inverted, meaning that only characters not inside the * set will be considered to match. * * @author   Per Cederberg, <per at percederberg dot net> * @version  1.0 */class CharacterSetElement extends Element {    /**     * The dot ('.') character set. This element matches a single      * character that is not equal to a newline character.      */    public static final CharacterSetElement DOT =         new CharacterSetElement(false);    /**     * The digit character set. This element matches a single      * numeric character.      */    public static final CharacterSetElement DIGIT =         new CharacterSetElement(false);    /**     * The non-digit character set. This element matches a single      * non-numeric character.      */    public static final CharacterSetElement NON_DIGIT =         new CharacterSetElement(true);    /**     * The whitespace character set. This element matches a single      * whitespace character.      */    public static final CharacterSetElement WHITESPACE =         new CharacterSetElement(false);    /**     * The non-whitespace character set. This element matches a single      * non-whitespace character.      */    public static final CharacterSetElement NON_WHITESPACE =         new CharacterSetElement(true);    /**     * The word character set. This element matches a single word     * character.      */    public static final CharacterSetElement WORD =         new CharacterSetElement(false);    /**     * The non-word character set. This element matches a single      * non-word character.      */    public static final CharacterSetElement NON_WORD =         new CharacterSetElement(true);    /**     * The inverted character set flag.     */    private boolean inverted;    /**     * The character set content. This array may contain either     * range objects or Character objects.     */    private ArrayList contents = new ArrayList();    /**     * Creates a new character set element. If the inverted character      * set flag is set, only characters NOT in the set will match.     *      * @param inverted       the inverted character set flag     */    public CharacterSetElement(boolean inverted) {        this.inverted = inverted;    }    /**     * Adds a single character to this character set.     *      * @param c              the character to add     */    public void addCharacter(char c) {        contents.add(new Character(c));    }    /**     * Adds multiple characters to this character set.     *      * @param str            the string with characters to add     */    public void addCharacters(String str) {        for (int i = 0; i < str.length(); i++) {            addCharacter(str.charAt(i));        }    }    /**     * Adds multiple characters to this character set.     *      * @param elem           the string element with characters to add     */    public void addCharacters(StringElement elem) {        addCharacters(elem.getString());    }    /**     * Adds a character range to this character set.     *      * @param min            the minimum character value     * @param max            the maximum character value     */    public void addRange(char min, char max) {        contents.add(new Range(min, max));    }    /**     * Adds a character subset to this character set.     *      * @param elem           the character set to add     */    public void addCharacterSet(CharacterSetElement elem) {        contents.add(elem);    }    /**     * Returns this element as the character set shouldn't be modified     * after creation. This partially breaks the contract of clone(),     * but as new characters are not added to the character set after     * creation, this will work correctly.      *      * @return this character set element     */    public Object clone() {        return this;    }    /**     * Returns the length of a matching string starting at the      * specified position. The number of matches to skip can also be     * specified, but numbers higher than zero (0) cause a failed      * match for any element that doesn't attempt to combine other      * elements.     *     * @param m              the matcher being used      * @param str            the string to match     * @param start          the starting position     * @param skip           the number of matches to skip     *      * @return the length of the longest matching string, or     *         -1 if no match was found     */    public int match(Matcher m, CharBuffer str, int start, int skip) {        char     c;        if (skip != 0) {            return -1;        }        if (start >= str.length()) {            m.setReadEndOfString();            return -1;        }        c = str.charAt(start);        return inSet(c) ? 1 : -1;    }    /**     * Checks if the specified character matches this character set.     * This method takes the inverted flag into account.     *      * @param value          the character to check     *      * @return true if the character matches, or     *         false otherwise     */    private boolean inSet(char value) {        if (this == DOT) {            return inDotSet(value);        } else if (this == DIGIT || this == NON_DIGIT) {            return inDigitSet(value) != inverted;        } else if (this == WHITESPACE || this == NON_WHITESPACE) {            return inWhitespaceSet(value) != inverted;        } else if (this == WORD || this == NON_WORD) {            return inWordSet(value) != inverted;        } else {            return inUserSet(value) != inverted;        }    }    /**     * Checks if the specified character is present in the 'dot'  set.      * This method does not consider the inverted flag.     *      * @param value          the character to check     *      * @return true if the character is present, or     *         false otherwise     */    private boolean inDotSet(char value) {        switch (value) {        case '\n':        case '\r':        case '\u0085':        case '\u2028':        case '\u2029':            return false;        default:            return true;        }    }    /**     * Checks if the specified character is a digit. This method does      * not consider the inverted flag.     *      * @param value          the character to check     *      * @return true if the character is a digit, or     *         false otherwise     */    private boolean inDigitSet(char value) {        return '0' <= value && value <= '9';    }    /**     * Checks if the specified character is a whitespace character.     * This method does not consider the inverted flag.     *      * @param value          the character to check     *      * @return true if the character is a whitespace character, or     *         false otherwise     */    private boolean inWhitespaceSet(char value) {        switch (value) {        case ' ':        case '\t':        case '\n':        case '\f':        case '\r':        case 11:            return true;        default:            return false;        }    }    /**     * Checks if the specified character is a word character. This      * method does not consider the inverted flag.     *      * @param value          the character to check     *      * @return true if the character is a word character, or     *         false otherwise     */    private boolean inWordSet(char value) {        return ('a' <= value && value <= 'z')            || ('A' <= value && value <= 'Z')            || ('0' <= value && value <= '9')            || value == '_';    }    /**     * Checks if the specified character is present in the user-     * defined set. This method does not consider the inverted flag.     *      * @param value          the character to check     *      * @return true if the character is present, or     *         false otherwise     */    private boolean inUserSet(char value) {        Object               obj;        Character            c;        Range                r;        CharacterSetElement  e;        for (int i = 0; i < contents.size(); i++) {            obj = contents.get(i);            if (obj instanceof Character) {                c = (Character) obj;                if (c.charValue() == value) {                    return true;                }            } else if (obj instanceof Range) {                r = (Range) obj;                if (r.inside(value)) {                    return true;                }            } else if (obj instanceof CharacterSetElement) {                e = (CharacterSetElement) obj;                if (e.inSet(value)) {                    return true;                }            }        }        return false;    }        /**     * Prints this element to the specified output stream.     *      * @param output         the output stream to use     * @param indent         the current indentation     */    public void printTo(PrintWriter output, String indent) {        output.println(indent + toString());    }    /**     * Returns a string description of this character set.     *      * @return a string description of this character set     */    public String toString() {        StringBuffer  buffer;        // Handle predefined character sets        if (this == DOT) {            return ".";        } else if (this == DIGIT) {            return "\\d";        } else if (this == NON_DIGIT) {            return "\\D";        } else if (this == WHITESPACE) {            return "\\s";        } else if (this == NON_WHITESPACE) {            return "\\S";        } else if (this == WORD) {            return "\\w";        } else if (this == NON_WORD) {            return "\\W";        }        // Handle user-defined character sets                buffer = new StringBuffer();        if (inverted) {            buffer.append("^[");        } else {            buffer.append("[");        }        for (int i = 0; i < contents.size(); i++) {            buffer.append(contents.get(i));        }        buffer.append("]");        return buffer.toString();    }    /**     * A character range class.     */        private class Range {        /**         * The minimum character value.         */        private char min;        /**         * The maximum character value.         */        private char max;        /**         * Creates a new character range.         *          * @param min        the minimum character value         * @param max        the maximum character value         */        public Range(char min, char max) {            this.min = min;            this.max = max;        }                /**         * Checks if the specified character is inside the range.         *          * @param c          the character to check         *          * @return true if the character is in the range, or         *         false otherwise         */        public boolean inside(char c) {            return c >= min && c <= max;        }                /**         * Returns a string representation of this object.         *          * @return a string representation of this object         */        public String toString() {            return min + "-" + max;        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -