📄 rulebasedcollator.java
字号:
* // get en_US Collator rules * RuleBasedCollator en_USCollator = (RuleBasedCollator)Collator.getInstance(Locale.US); * // add a few Japanese character to sort before English characters * // suppose the last character before the first base letter 'a' in * // the English collation rule is \u2212 * String jaString = "& \u2212 < \u3041, \u3042 < \u3043, \u3044"; * RuleBasedCollator myJapaneseCollator = new * RuleBasedCollator(en_USCollator.getRules() + jaString); * </pre> * </blockquote> * * @see Collator * @see CollationElementIterator * @version 1.25 07/24/98 * @author Helena Shih, Laura Werner, Richard Gillam */public class RuleBasedCollator extends Collator{ // IMPLEMENTATION NOTES: The implementation of the collation algorithm is // divided across three classes: RuleBasedCollator, RBCollationTables, and // CollationElementIterator. RuleBasedCollator contains the collator's // transient state and includes the code that uses the other classes to // implement comparison and sort-key building. RuleBasedCollator also // contains the logic to handle French secondary accent sorting. // A RuleBasedCollator has two CollationElementIterators. State doesn't // need to be preserved in these objects between calls to compare() or // getCollationKey(), but the objects persist anyway to avoid wasting extra // creation time. compare() and getCollationKey() are synchronized to ensure // thread safety with this scheme. The CollationElementIterator is responsible // for generating collation elements from strings and returning one element at // a time (sometimes there's a one-to-many or many-to-one mapping between // characters and collation elements-- this class handles that). // CollationElementIterator depends on RBCollationTables, which contains the // collator's static state. RBCollationTables contains the actual data // tables specifying the collation order of characters for a particular locale // or use. It also contains the base logic that CollationElementIterator // uses to map from characters to collation elements. A single RBCollationTables // object is shared among all RuleBasedCollators for the same locale, and // thus by all the CollationElementIterators they create. /** * RuleBasedCollator constructor. This takes the table rules and builds * a collation table out of them. Please see RuleBasedCollator class * description for more details on the collation rule syntax. * @see java.util.Locale * @param rules the collation rules to build the collation table from. * @exception ParseException A format exception * will be thrown if the build process of the rules fails. For * example, build rule "a < ? < d" will cause the constructor to * throw the ParseException because the '?' is not quoted. */ public RuleBasedCollator(String rules) throws ParseException { this(rules, Collator.CANONICAL_DECOMPOSITION); } /** * RuleBasedCollator constructor. This takes the table rules and builds * a collation table out of them. Please see RuleBasedCollator class * description for more details on the collation rule syntax. * @see java.util.Locale * @param rules the collation rules to build the collation table from. * @param decomp the decomposition strength used to build the * collation table and to perform comparisons. * @exception ParseException A format exception * will be thrown if the build process of the rules fails. For * example, build rule "a < ? < d" will cause the constructor to * throw the ParseException because the '?' is not quoted. */ RuleBasedCollator(String rules, int decomp) throws ParseException { setStrength(Collator.TERTIARY); setDecomposition(decomp); tables = new RBCollationTables(rules, decomp); } /** * "Copy constructor." Used in clone() for performance. */ private RuleBasedCollator(RuleBasedCollator that) { setStrength(that.getStrength()); setDecomposition(that.getDecomposition()); tables = that.tables; } /** * Gets the table-based rules for the collation object. * @return returns the collation rules that the table collation object * was created from. */ public String getRules() { return tables.getRules(); } /** * Return a CollationElementIterator for the given String. * @see java.text.CollationElementIterator */ public CollationElementIterator getCollationElementIterator(String source) { return new CollationElementIterator( source, this ); } /** * Return a CollationElementIterator for the given String. * @see java.text.CollationElementIterator * @since 1.2 */ public CollationElementIterator getCollationElementIterator( CharacterIterator source) { return new CollationElementIterator( source, this ); } /** * Compares the character data stored in two different strings based on the * collation rules. Returns information about whether a string is less * than, greater than or equal to another string in a language. * This can be overriden in a subclass. */ public synchronized int compare(String source, String target) { // The basic algorithm here is that we use CollationElementIterators // to step through both the source and target strings. We compare each // collation element in the source string against the corresponding one // in the target, checking for differences. // // If a difference is found, we set <result> to LESS or GREATER to // indicate whether the source string is less or greater than the target. // // However, it's not that simple. If we find a tertiary difference // (e.g. 'A' vs. 'a') near the beginning of a string, it can be // overridden by a primary difference (e.g. "A" vs. "B") later in // the string. For example, "AA" < "aB", even though 'A' > 'a'. // // To keep track of this, we use strengthResult to keep track of the // strength of the most significant difference that has been found // so far. When we find a difference whose strength is greater than // strengthResult, it overrides the last difference (if any) that // was found. int result = Collator.EQUAL; if (sourceCursor == null) { sourceCursor = getCollationElementIterator(source); } else { sourceCursor.setText(source); } if (targetCursor == null) { targetCursor = getCollationElementIterator(target); } else { targetCursor.setText(target); } int sOrder = 0, tOrder = 0; boolean initialCheckSecTer = getStrength() >= Collator.SECONDARY; boolean checkSecTer = initialCheckSecTer; boolean checkTertiary = getStrength() >= Collator.TERTIARY; boolean gets = true, gett = true; while(true) { // Get the next collation element in each of the strings, unless // we've been requested to skip it. if (gets) sOrder = sourceCursor.next(); else gets = true; if (gett) tOrder = targetCursor.next(); else gett = true; // If we've hit the end of one of the strings, jump out of the loop if ((sOrder == CollationElementIterator.NULLORDER)|| (tOrder == CollationElementIterator.NULLORDER)) break; int pSOrder = CollationElementIterator.primaryOrder(sOrder); int pTOrder = CollationElementIterator.primaryOrder(tOrder); // If there's no difference at this position, we can skip it if (sOrder == tOrder) { if (tables.isFrenchSec() && pSOrder != 0) { if (!checkSecTer) { // in french, a secondary difference more to the right is stronger, // so accents have to be checked with each base element checkSecTer = initialCheckSecTer; // but tertiary differences are less important than the first // secondary difference, so checking tertiary remains disabled checkTertiary = false; } } continue; } // Compare primary differences first. if ( pSOrder != pTOrder ) { if (sOrder == 0) { // The entire source element is ignorable. // Skip to the next source element, but don't fetch another target element. gett = false; continue; } if (tOrder == 0) { gets = false; continue; } // The source and target elements aren't ignorable, but it's still possible // for the primary component of one of the elements to be ignorable.... if (pSOrder == 0) // primary order in source is ignorable { // The source's primary is ignorable, but the target's isn't. We treat ignorables // as a secondary difference, so remember that we found one. if (checkSecTer) { result = Collator.GREATER; // (strength is SECONDARY) checkSecTer = false; } // Skip to the next source element, but don't fetch another target element. gett = false; } else if (pTOrder == 0) { // record differences - see the comment above. if (checkSecTer) { result = Collator.LESS; // (strength is SECONDARY) checkSecTer = false; } // Skip to the next source element, but don't fetch another target element. gets = false; } else { // Neither of the orders is ignorable, and we already know that the primary // orders are different because of the (pSOrder != pTOrder) test above. // Record the difference and stop the comparison. if (pSOrder < pTOrder) { return Collator.LESS; // (strength is PRIMARY) } else { return Collator.GREATER; // (strength is PRIMARY) } } } else { // else of if ( pSOrder != pTOrder ) // primary order is the same, but complete order is different. So there // are no base elements at this point, only ignorables (Since the strings are // normalized) if (checkSecTer) { // a secondary or tertiary difference may still matter short secSOrder = CollationElementIterator.secondaryOrder(sOrder); short secTOrder = CollationElementIterator.secondaryOrder(tOrder); if (secSOrder != secTOrder) { // there is a secondary difference result = (secSOrder < secTOrder) ? Collator.LESS : Collator.GREATER; // (strength is SECONDARY) checkSecTer = false; // (even in french, only the first secondary difference within // a base character matters) } else { if (checkTertiary) { // a tertiary difference may still matter short terSOrder = CollationElementIterator.tertiaryOrder(sOrder); short terTOrder = CollationElementIterator.tertiaryOrder(tOrder); if (terSOrder != terTOrder) { // there is a tertiary difference result = (terSOrder < terTOrder) ? Collator.LESS : Collator.GREATER;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -