📄 collationelementiterator.java
字号:
/* * @(#)CollationElementIterator.java 1.45 03/01/27 * * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. *//* * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved * * The original version of this source code and documentation is copyrighted * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These * materials are provided under terms of a License Agreement between Taligent * and Sun. This technology is protected by multiple US and International * patents. This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. * */package java.text;import java.lang.Character;import java.util.Vector;import sun.text.Normalizer;import sun.text.NormalizerUtilities;/** * The <code>CollationElementIterator</code> class is used as an iterator * to walk through each character of an international string. Use the iterator * to return the ordering priority of the positioned character. The ordering * priority of a character, which we refer to as a key, defines how a character * is collated in the given collation object. * * <p> * For example, consider the following in Spanish: * <blockquote> * <pre> * "ca" -> the first key is key('c') and second key is key('a'). * "cha" -> the first key is key('ch') and second key is key('a'). * </pre> * </blockquote> * And in German, * <blockquote> * <pre> * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and * the third key is key('b'). * </pre> * </blockquote> * The key of a character is an integer composed of primary order(short), * secondary order(byte), and tertiary order(byte). Java strictly defines * the size and signedness of its primitive data types. Therefore, the static * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>, * and <code>short</code> respectively to ensure the correctness of the key * value. * * <p> * Example of the iterator usage, * <blockquote> * <pre> * * String testString = "This is a test"; * RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)Collator.getInstance(); * CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString); * int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next()); * </pre> * </blockquote> * * <p> * <code>CollationElementIterator.next</code> returns the collation order * of the next character. A collation order consists of primary order, * secondary order and tertiary order. The data type of the collation * order is <strong>int</strong>. The first 16 bits of a collation order * is its primary order; the next 8 bits is the secondary order and the * last 8 bits is the tertiary order. * * @see Collator * @see RuleBasedCollator * @version 1.24 07/27/98 * @author Helena Shih, Laura Werner, Richard Gillam */public final class CollationElementIterator{ /** * Null order which indicates the end of string is reached by the * cursor. */ public final static int NULLORDER = 0xffffffff; /** * CollationElementIterator constructor. This takes the source string and * the collation object. The cursor will walk thru the source string based * on the predefined collation rules. If the source string is empty, * NULLORDER will be returned on the calls to next(). * @param sourceText the source string. * @param order the collation object. */ CollationElementIterator(String sourceText, RuleBasedCollator owner) { this.owner = owner; ordering = owner.getTables(); if ( sourceText.length() != 0 ) { Normalizer.Mode mode = NormalizerUtilities.toNormalizerMode(owner.getDecomposition()); text = new Normalizer(sourceText, mode); } } /** * CollationElementIterator constructor. This takes the source string and * the collation object. The cursor will walk thru the source string based * on the predefined collation rules. If the source string is empty, * NULLORDER will be returned on the calls to next(). * @param sourceText the source string. * @param order the collation object. */ CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) { this.owner = owner; ordering = owner.getTables(); Normalizer.Mode mode = NormalizerUtilities.toNormalizerMode(owner.getDecomposition()); text = new Normalizer(sourceText, mode); } /** * Resets the cursor to the beginning of the string. The next call * to next() will return the first collation element in the string. */ public void reset() { if (text != null) { text.reset(); Normalizer.Mode mode = NormalizerUtilities.toNormalizerMode(owner.getDecomposition()); text.setMode(mode); } buffer = null; expIndex = 0; swapOrder = 0; } /** * Get the next collation element in the string. <p>This iterator iterates * over a sequence of collation elements that were built from the string. * Because there isn't necessarily a one-to-one mapping from characters to * collation elements, this doesn't mean the same thing as "return the * collation element [or ordering priority] of the next character in the * string".</p> * <p>This function returns the collation element that the iterator is currently * pointing to and then updates the internal pointer to point to the next element. * previous() updates the pointer first and then returns the element. This * means that when you change direction while iterating (i.e., call next() and * then call previous(), or call previous() and then call next()), you'll get * back the same element twice.</p> */ public int next() { if (text == null) { return NULLORDER; } Normalizer.Mode textMode = text.getMode(); // convert the owner's mode to something the Normalizer understands Normalizer.Mode ownerMode = NormalizerUtilities.toNormalizerMode(owner.getDecomposition()); if (textMode != ownerMode) { text.setMode(ownerMode); } // if buffer contains any decomposed char values // return their strength orders before continuing in // the the Normalizer's CharacterIterator. if (buffer != null) { if (expIndex < buffer.length) { return strengthOrder(buffer[expIndex++]); } else { buffer = null; expIndex = 0; } } else if (swapOrder != 0) { int order = swapOrder << 16; swapOrder = 0; return order; } char ch = text.next(); // are we at the end of Normalizer's text? if (ch == Normalizer.DONE) { return NULLORDER; } int value = ordering.getUnicodeOrder(ch); if (value == RuleBasedCollator.UNMAPPED) { swapOrder = ch; return UNMAPPEDCHARVALUE; } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) { value = nextContractChar(ch); } if (value >= RuleBasedCollator.EXPANDCHARINDEX) { buffer = ordering.getExpandValueList(value); expIndex = 0; value = buffer[expIndex++]; } if (ordering.isSEAsianSwapping()) { char consonant; if (isThaiPreVowel(ch)) { consonant = text.next(); if (isThaiBaseConsonant(consonant)) { buffer = makeReorderedBuffer(consonant, value, buffer, true); value = buffer[0]; expIndex = 1; } else { text.previous(); } } if (isLaoPreVowel(ch)) { consonant = text.next(); if (isLaoBaseConsonant(consonant)) { buffer = makeReorderedBuffer(consonant, value, buffer, true); value = buffer[0]; expIndex = 1; } else { text.previous(); } } } return strengthOrder(value); } /** * Get the previous collation element in the string. <p>This iterator iterates * over a sequence of collation elements that were built from the string. * Because there isn't necessarily a one-to-one mapping from characters to * collation elements, this doesn't mean the same thing as "return the * collation element [or ordering priority] of the previous character in the * string".</p> * <p>This function updates the iterator's internal pointer to point to the * collation element preceding the one it's currently pointing to and then * returns that element, while next() returns the current element and then * updates the pointer. This means that when you change direction while * iterating (i.e., call next() and then call previous(), or call previous() * and then call next()), you'll get back the same element twice.</p> * @since 1.2 */ public int previous() { if (text == null) { return NULLORDER; } Normalizer.Mode textMode = text.getMode(); // convert the owner's mode to something the Normalizer understands Normalizer.Mode ownerMode = NormalizerUtilities.toNormalizerMode(owner.getDecomposition()); if (textMode != ownerMode) { text.setMode(ownerMode); } if (buffer != null) { if (expIndex > 0) { return strengthOrder(buffer[--expIndex]); } else { buffer = null; expIndex = 0; } } else if (swapOrder != 0) { int order = swapOrder << 16; swapOrder = 0; return order; } char ch = text.previous(); if (ch == Normalizer.DONE) { return NULLORDER; } int value = ordering.getUnicodeOrder(ch); if (value == RuleBasedCollator.UNMAPPED) { swapOrder = UNMAPPEDCHARVALUE; return ch; } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) { value = prevContractChar(ch); } if (value >= RuleBasedCollator.EXPANDCHARINDEX) { buffer = ordering.getExpandValueList(value); expIndex = buffer.length; value = buffer[--expIndex]; } if (ordering.isSEAsianSwapping()) { char vowel; if (isThaiBaseConsonant(ch)) { vowel = text.previous(); if (isThaiPreVowel(vowel)) { buffer = makeReorderedBuffer(vowel, value, buffer, false); expIndex = buffer.length - 1; value = buffer[expIndex]; } else { text.next(); } } if (isLaoBaseConsonant(ch)) { vowel = text.previous(); if (isLaoPreVowel(vowel)) { buffer = makeReorderedBuffer(vowel, value, buffer, false); expIndex = buffer.length - 1; value = buffer[expIndex]; } else { text.next(); } } } return strengthOrder(value); } /** * Return the primary component of a collation element. * @param order the collation element * @return the element's primary component */ public final static int primaryOrder(int order) { order &= RBCollationTables.PRIMARYORDERMASK; return (order >>> RBCollationTables.PRIMARYORDERSHIFT); } /** * Return the secondary component of a collation element. * @param order the collation element * @return the element's secondary component */ public final static short secondaryOrder(int order) { order = order & RBCollationTables.SECONDARYORDERMASK; return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT)); } /** * Return the tertiary component of a collation element. * @param order the collation element * @return the element's tertiary component */ public final static short tertiaryOrder(int order) { return ((short)(order &= RBCollationTables.TERTIARYORDERMASK)); } /** * Get the comparison order in the desired strength. Ignore the other * differences. * @param order The order value */ final int strengthOrder(int order) { int s = owner.getStrength(); if (s == Collator.PRIMARY) { order &= RBCollationTables.PRIMARYDIFFERENCEONLY; } else if (s == Collator.SECONDARY) { order &= RBCollationTables.SECONDARYDIFFERENCEONLY; } return order; } /** * Sets the iterator to point to the collation element corresponding to * the specified character (the parameter is a CHARACTER offset in the
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -