📄 charset.java

📁 java源代码请看看啊提点宝贵的意见
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* * @(#)CharSet.java	1.8 03/01/23 * * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. *//* * @(#)CharSet.java	1.1 99/02/18 * * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved * * The original version of this source code and documentation * is copyrighted and owned by Taligent, Inc., a wholly-owned * subsidiary of IBM. These materials are provided under terms * of a License Agreement between Taligent and Sun. This technology * is protected by multiple US and International patents. * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. */package java.text;import java.util.Hashtable;/** * An object representing a set of characters.  (This is a "set" in the * mathematical sense: an unduplicated list of characters on which set * operations such as union and intersection can be performed.)  The * set information is stored in compressed, optimized form: The object * contains a String with an even number of characters.  Each pair of * characters represents a range of characters contained in the set * (a pair of the same character represents a single character).  The * characters are sorted in increasing order. */class CharSet implements Cloneable {    /**     * The structure containing the set information.  The characters     * in this string are organized into pairs, each pair representing     * a range of characters contained in the set     */    private String chars;    //==========================================================================    // parseString() and associated routines    //==========================================================================    /**     * A cache which is used to speed up parseString() whenever it is     * used to parse a description that has been parsed before     */    private static Hashtable expressionCache = null;    /**     * Builds a CharSet based on a textual description.  For the syntax of     * the description, see the documentation of RuleBasedBreakIterator.     * @see java.text.RuleBasedBreakIterator     */    public static CharSet parseString(String s) {        CharSet result = null;        // if "s" is in the expression cache, pull the result out        // of the expresison cache        if (expressionCache != null) {            result = (CharSet)expressionCache.get(s);        }        // otherwise, use doParseString() to actually parse the string,        // and then add a corresponding entry to the expression cache        if (result == null) {            result = doParseString(s);            if (expressionCache == null) {                expressionCache = new Hashtable();            }            expressionCache.put(s, result);        }        result = (CharSet)(result.clone());        return result;    }    /**     * This function is used by parseString() to actually parse the string     */    private static CharSet doParseString(String s) {        CharSet result = new CharSet();        int p = 0;        boolean haveDash = false;        boolean haveTilde = false;        boolean wIsReal = false;        char w = '\u0000';        // for each character in the description...        while (p < s.length()) {            char c = s.charAt(p);            // if it's an opening bracket...            if (c == '[') {                // flush the single-character cache                if (wIsReal) {                    result.internalUnion(new CharSet(w));                }		                // locate the matching closing bracket                int bracketLevel = 1;                int q = p + 1;                while (bracketLevel != 0) {		    // if no matching bracket by end of string then...		    if (q >= s.length()) {			throw new IllegalArgumentException("Parse error at position " + p + " in " + s);		    }		    switch (s.charAt(q)) {		    case '\\': // need to step over next character			++q;			break;		    case '[':                        ++bracketLevel;			break;		    case ']':                        --bracketLevel;			break;		    }                    ++q;                }                --q;		                // call parseString() recursively to parse the text inside                // the brackets, then either add or subtract the result from                // our running result depending on whether or not the []                // expresison was preceded by a ^                if (!haveTilde) {                    result.internalUnion(CharSet.parseString(s.substring(p + 1, q)));                }                else {                    result.internalDifference(CharSet.parseString(s.substring(p + 1, q)));                }                haveTilde = false;                haveDash = false;                wIsReal = false;                p = q + 1;            }	                // if the character is a colon...            else if (c == ':') {                // flush the single-character cache                if (wIsReal) {                    result.internalUnion(new CharSet(w));                }                // locate the matching colon (and throw an error if there                // isn't one)                int q = s.indexOf(':', p + 1);                if (q == -1) {                    throw new IllegalArgumentException("Parse error at position " + p + " in " + s);                }                // use charSetForCategory() to parse the text in the colons,                // and either add or substract the result from our running                // result depending on whether the :: expression was                // preceded by a ^                if (!haveTilde) {                    result.internalUnion(charSetForCategory(s.substring(p + 1, q)));                }                else {                    result.internalDifference(charSetForCategory(s.substring(p + 1, q)));                }                // reset everything and advance to the next character                haveTilde = false;                haveDash = false;                wIsReal = false;                p = q + 1;            }            // if the character is a dash, set an appropriate flag            else if (c == '-') {                if (wIsReal) {                    haveDash = true;                }                ++p;            }            // if the character is a caret, flush the single-character            // cache and set an appropriate flag.  If the set is empty            // (i.e., if the expression begins with ^), invert the set            // (i.e., set it to include everything).  The idea here is            // that a set that includes nothing but ^ expressions            // means "everything but these things".            else if (c == '^') {                if (wIsReal) {                    result.internalUnion(new CharSet(w));                    wIsReal = false;                }                haveTilde = true;                ++p;                if (result.empty()) {                    result.internalComplement();                }            }            // throw an exception on an illegal character            else if (c >= ' ' && c < '\u007f' && !Character.isLetter(c)                     && !Character.isDigit(c) && c != '\\') {                throw new IllegalArgumentException("Parse error at position " + p + " in " + s);            }            // otherwise, we end up here...            else {                // on a backslash, advance to the next character                if (c == '\\') {                    ++p;                }                // if the preceding character was a dash, this character                // defines the end of a range.  Add or subtract that range                // from the running result depending on whether or not it                // was preceded by a ^                if (haveDash) {                    if (s.charAt(p) < w) {                        throw new IllegalArgumentException("U+" + Integer.toHexString(s.charAt(p))                            + " is less than U+" + Integer.toHexString(w) + ".  Dash expressions "                            + "can't have their endpoints in reverse order.");                    }                    if (!haveTilde) {                        result.internalUnion(new CharSet(w, s.charAt(p++)));                    }                    else {                        result.internalDifference(new CharSet(w, s.charAt(p++)));                    }                    haveDash = false;                    haveTilde = false;                    wIsReal = false;                }                // if the preceding character was a caret, remove this character                // from the running result                else if (haveTilde) {                    result.internalDifference(new CharSet(s.charAt(p++)));                    haveTilde = false;                    wIsReal = false;                }                // otherwise, flush the single-character cache and then                // put this character into the cache                else if (wIsReal) {                    result.internalUnion(new CharSet(w));                    w = s.charAt(p++);                    wIsReal = true;                }                else {                    w = s.charAt(p++);                    wIsReal = true;                }            }        }        // finally, flush the single-character cache one last time        if (wIsReal) {            result.internalUnion(new CharSet(w));        }        return result;    }    /**     * Creates a CharSet containing all the characters in a particular     * Unicode category.  The text is either a two-character code from     * the Unicode database or a single character that begins one or more
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -