📄 rulebasedcollator.java
字号:
// (strength is TERTIARY) checkTertiary = false; } } } } // if (checkSecTer) } // if ( pSOrder != pTOrder ) } // while() if (sOrder != CollationElementIterator.NULLORDER) { // (tOrder must be CollationElementIterator::NULLORDER, // since this point is only reached when sOrder or tOrder is NULLORDER.) // The source string has more elements, but the target string hasn't. do { if (CollationElementIterator.primaryOrder(sOrder) != 0) { // We found an additional non-ignorable base character in the source string. // This is a primary difference, so the source is greater return Collator.GREATER; // (strength is PRIMARY) } else if (CollationElementIterator.secondaryOrder(sOrder) != 0) { // Additional secondary elements mean the source string is greater if (checkSecTer) { result = Collator.GREATER; // (strength is SECONDARY) checkSecTer = false; } } } while ((sOrder = sourceCursor.next()) != CollationElementIterator.NULLORDER); } else if (tOrder != CollationElementIterator.NULLORDER) { // The target string has more elements, but the source string hasn't. do { if (CollationElementIterator.primaryOrder(tOrder) != 0) // We found an additional non-ignorable base character in the target string. // This is a primary difference, so the source is less return Collator.LESS; // (strength is PRIMARY) else if (CollationElementIterator.secondaryOrder(tOrder) != 0) { // Additional secondary elements in the target mean the source string is less if (checkSecTer) { result = Collator.LESS; // (strength is SECONDARY) checkSecTer = false; } } } while ((tOrder = targetCursor.next()) != CollationElementIterator.NULLORDER); } // For IDENTICAL comparisons, we use a bitwise character comparison // as a tiebreaker if all else is equal if (result == 0 && getStrength() == IDENTICAL) { Normalizer.Mode mode = NormalizerUtilities.toNormalizerMode(getDecomposition()); String sourceDecomposition = Normalizer.normalize(source, mode, 0); String targetDecomposition = Normalizer.normalize(target, mode, 0); result = sourceDecomposition.compareTo(targetDecomposition); } return result; } /** * Transforms the string into a series of characters that can be compared * with CollationKey.compareTo. This overrides java.text.Collator.getCollationKey. * It can be overriden in a subclass. */ public synchronized CollationKey getCollationKey(String source) { // // The basic algorithm here is to find all of the collation elements for each // character in the source string, convert them to a char representation, // and put them into the collation key. But it's trickier than that. // Each collation element in a string has three components: primary (A vs B), // secondary (A vs A-acute), and tertiary (A' vs a); and a primary difference // at the end of a string takes precedence over a secondary or tertiary // difference earlier in the string. // // To account for this, we put all of the primary orders at the beginning of the // string, followed by the secondary and tertiary orders, separated by nulls. // // Here's a hypothetical example, with the collation element represented as // a three-digit number, one digit for primary, one for secondary, etc. // // String: A a B \u00e9 <--(e-acute) // Collation Elements: 101 100 201 510 // // Collation Key: 1125<null>0001<null>1010 // // To make things even trickier, secondary differences (accent marks) are compared // starting at the *end* of the string in languages with French secondary ordering. // But when comparing the accent marks on a single base character, they are compared // from the beginning. To handle this, we reverse all of the accents that belong // to each base character, then we reverse the entire string of secondary orderings // at the end. Taking the same example above, a French collator might return // this instead: // // Collation Key: 1125<null>1000<null>1010 // if (source == null) return null; if (primResult == null) { primResult = new StringBuffer(); secResult = new StringBuffer(); terResult = new StringBuffer(); } else { primResult.setLength(0); secResult.setLength(0); terResult.setLength(0); } int order = 0; boolean compareSec = (getStrength() >= Collator.SECONDARY); boolean compareTer = (getStrength() >= Collator.TERTIARY); int secOrder = CollationElementIterator.NULLORDER; int terOrder = CollationElementIterator.NULLORDER; int preSecIgnore = 0; if (sourceCursor == null) { sourceCursor = getCollationElementIterator(source); } else { sourceCursor.setText(source); } // walk through each character while ((order = sourceCursor.next()) != CollationElementIterator.NULLORDER) { secOrder = CollationElementIterator.secondaryOrder(order); terOrder = CollationElementIterator.tertiaryOrder(order); if (!CollationElementIterator.isIgnorable(order)) { primResult.append((char) (CollationElementIterator.primaryOrder(order) + COLLATIONKEYOFFSET)); if (compareSec) { // // accumulate all of the ignorable/secondary characters attached // to a given base character // if (tables.isFrenchSec() && preSecIgnore < secResult.length()) { // // We're doing reversed secondary ordering and we've hit a base // (non-ignorable) character. Reverse any secondary orderings // that applied to the last base character. (see block comment above.) // RBCollationTables.reverse(secResult, preSecIgnore, secResult.length()); } // Remember where we are in the secondary orderings - this is how far // back to go if we need to reverse them later. secResult.append((char)(secOrder+ COLLATIONKEYOFFSET)); preSecIgnore = secResult.length(); } if (compareTer) { terResult.append((char)(terOrder+ COLLATIONKEYOFFSET)); } } else { if (compareSec && secOrder != 0) secResult.append((char) (secOrder + tables.getMaxSecOrder() + COLLATIONKEYOFFSET)); if (compareTer && terOrder != 0) terResult.append((char) (terOrder + tables.getMaxTerOrder() + COLLATIONKEYOFFSET)); } } if (tables.isFrenchSec()) { if (preSecIgnore < secResult.length()) { // If we've accumlated any secondary characters after the last base character, // reverse them. RBCollationTables.reverse(secResult, preSecIgnore, secResult.length()); } // And now reverse the entire secResult to get French secondary ordering. RBCollationTables.reverse(secResult, 0, secResult.length()); } primResult.append((char)0); secResult.append((char)0); secResult.append(terResult.toString()); primResult.append(secResult.toString()); if (getStrength() == IDENTICAL) { primResult.append((char)0); Normalizer.Mode mode = NormalizerUtilities.toNormalizerMode(getDecomposition()); primResult.append(Normalizer.normalize(source, mode, 0)); } return new CollationKey(source, primResult.toString()); } /** * Standard override; no change in semantics. */ public Object clone() { // if we know we're not actually a subclass of RuleBasedCollator // (this class really should have been made final), bypass // Object.clone() and use our "copy constructor". This is faster. if (getClass() == RuleBasedCollator.class) { return new RuleBasedCollator(this); } else { RuleBasedCollator result = (RuleBasedCollator) super.clone(); result.primResult = null; result.secResult = null; result.terResult = null; result.sourceCursor = null; result.targetCursor = null; return result; } } /** * Compares the equality of two collation objects. * @param obj the table-based collation object to be compared with this. * @return true if the current table-based collation object is the same * as the table-based collation object obj; false otherwise. */ public boolean equals(Object obj) { if (obj == null) return false; if (!super.equals(obj)) return false; // super does class check RuleBasedCollator other = (RuleBasedCollator) obj; // all other non-transient information is also contained in rules. return (getRules().equals(other.getRules())); } /** * Generates the hash code for the table-based collation object */ public int hashCode() { return getRules().hashCode(); } /** * Allows CollationElementIterator access to the tables object */ RBCollationTables getTables() { return tables; } // ============================================================== // private // ============================================================== final static int CHARINDEX = 0x70000000; // need look up in .commit() final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow final static int UNMAPPED = 0xFFFFFFFF; private final static int COLLATIONKEYOFFSET = 1; private RBCollationTables tables = null; // Internal objects that are cached across calls so that they don't have to // be created/destroyed on every call to compare() and getCollationKey() private StringBuffer primResult = null; private StringBuffer secResult = null; private StringBuffer terResult = null; private CollationElementIterator sourceCursor = null; private CollationElementIterator targetCursor = null;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -