composedchariter.java

来自「This is a resource based on j2me embedde」· Java 代码 · 共 193 行

JAVA
193
字号
/* *  * @(#)ComposedCharIter.java	1.7 06/10/10 *  * Portions Copyright  2000-2008 Sun Microsystems, Inc. All Rights * Reserved.  Use is subject to license terms. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER *  * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 only, as published by the Free Software Foundation. *  * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is * included at /legal/license.txt). *  * You should have received a copy of the GNU General Public License * version 2 along with this work; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA *  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa * Clara, CA 95054 or visit www.sun.com if you need additional * information or have any questions. *//* * (C) Copyright IBM Corp. 1996-2001 - All Rights Reserved * * The original version of this source code and documentation is * copyrighted and owned by IBM. These materials are provided * under terms of a License Agreement between IBM and Sun. * This technology is protected by multiple US and International * patents. This notice and attribution to IBM may not be removed. */package sun.text;/** * <tt>ComposedCharIter</tt> is an iterator class that returns all * of the precomposed characters defined in the Unicode standard, along * with their decomposed forms.  This is often useful when building * data tables (<i>e.g.</i> collation tables) which need to treat composed * and decomposed characters equivalently. * <p> * For example, imagine that you have built a collation table with ordering * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all * characters used in a particular language.  When you process input text using * this table, the text must first be decomposed so that it matches the form * used in the table.  This can impose a performance penalty that may be * unacceptable in some situations. * <p> * You can avoid this problem by ensuring that the collation table contains * rules for both the decomposed <i>and</i> composed versions of each character. * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the * composed characters in Unicode.  If the decomposition for that character * consists solely of characters that are listed in your ruleset, you can * add a new rule for the composed character that makes it equivalent to * its decomposition sequence. * <p> * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table * of the composed characters in Unicode.  If you want to iterate over the * composed characters in a particular string, use {@link Normalizer} instead. * <p> * When constructing a <tt>ComposedCharIter</tt> there is one * optional feature that you can enable or disable: * <ul> *   <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul *          characters and their corresponding Jamo decompositions. *          This option is off by default (<i>i.e.</i> Hangul processing is enabled) *          since the Unicode standard specifies that Hangul to Jamo *          is a canonical decomposition. * </ul> * <p> * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>. * It will be updated as later versions of Unicode are released. */public final class ComposedCharIter {    /**     * Constant that indicates the iteration has completed.     * {@link #next} returns this value when there are no more composed characters     * over which to iterate.     */    public static final char DONE = Normalizer.DONE;    /**     * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return     * all Unicode characters with canonical decompositions, including Korean     * Hangul characters.     */    public ComposedCharIter() {        minDecomp = DecompData.MAX_COMPAT;        hangul = false;    }    /**     * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.     * <p>     * @param compat    <tt>false</tt> for canonical decompositions only;     *                  <tt>true</tt> for both canonical and compatibility     *                  decompositions.     *     * @param options   Optional decomposition features.  Currently, the only     *                  supported option is {@link Normalizer#IGNORE_HANGUL}, which     *                  causes this <tt>ComposedCharIter</tt> not to iterate     *                  over the Hangul characters and their corresponding     *                  Jamo decompositions.     */    public ComposedCharIter(boolean compat, int options) {        // Compatibility explosions have lower indices; skip them if necessary        minDecomp = compat ? 0 : DecompData.MAX_COMPAT;        hangul = (options & Normalizer.IGNORE_HANGUL) == 0;    }    /**     * Determines whether there any precomposed Unicode characters not yet returned     * by {@link #next}.     */    public boolean hasNext() {        if (nextChar == DONE)  {            findNextChar();        }        return nextChar != DONE;    }    /**     * Returns the next precomposed Unicode character.     * Repeated calls to <tt>next</tt> return all of the precomposed characters defined     * by Unicode, in ascending order.  After all precomposed characters have     * been returned, {@link #hasNext} will return <tt>false</tt> and further calls     * to <tt>next</tt> will return {@link #DONE}.     */    public char next() {        if (nextChar == DONE)  {            findNextChar();        }        curChar = nextChar;        nextChar = DONE;        return curChar;    }    /**     * Returns the Unicode decomposition of the current character.     * This method returns the decomposition of the precomposed character most     * recently returned by {@link #next}.  The resulting decomposition is     * affected by the settings of the options passed to the constructor.     */    public String decomposition() {        StringBuffer result = new StringBuffer();        int pos = (char)(DecompData.offsets.elementAt(curChar) & DecompData.DECOMP_MASK);        if (pos > minDecomp) {            DecompData.doAppend(pos, result);        } else if (hangul && curChar >= HANGUL_BASE && curChar < HANGUL_LIMIT) {	    Normalizer.hangulToJamo(curChar, result, minDecomp);        } else {            result.append(curChar);        }        return result.toString();    }    private void findNextChar() {        if (curChar != DONE) {            char ch = curChar;            while (++ch < 0xFFFF) {                int offset = DecompData.offsets.elementAt(ch) & DecompData.DECOMP_MASK;                if (offset > minDecomp                    || (hangul && ch >= HANGUL_BASE && ch < HANGUL_LIMIT) ) {                    nextChar = ch;                    break;                }            }        }    }    private final int minDecomp;    private final boolean hangul;    private char curChar = 0;    private char nextChar = Normalizer.DONE;    private static final char HANGUL_BASE = Normalizer.HANGUL_BASE;    private static final char HANGUL_LIMIT = Normalizer.HANGUL_LIMIT;};

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?