rulebasedcollator.java
来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 948 行 · 第 1/2 页
JAVA
948 行
throw new ParseException ("operator missing at " + (i+base_offset), i+base_offset); if (!eatingChars && ((c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B && c <= 0x7E))) throw new ParseException ("unquoted punctuation character '"+c+"'", i+base_offset); //type = ignoreChars ? CollationSorter.IGNORE : -1; sb.append(c); break; } if (type < 0) continue; if (operator < 0) { operator = type; continue; } if (sb.length() == 0 && !isModifier) throw new ParseException ("text element empty at " + (i+base_offset), i+base_offset); if (operator == CollationSorter.RESET) { /* Reposition in the sorting list at the position * indicated by the text element. */ String subrules = rules.substring(i); Vector sorted_rules = new Vector(); int idx; // Parse the subrules but do not iterate through all // sublist. This is the priviledge of the first call. idx = subParseString(true, sorted_rules, base_offset+i, subrules); // Merge new parsed rules into the list. mergeRules(base_offset+i, sb.toString(), v, sorted_rules); sb.setLength(0); // Reset state to none. operator = -1; type = -1; // We have found a new subrule at 'idx' but it has not been parsed. if (idx >= 0) { i += idx-1; continue main_parse_loop; } else // No more rules. break main_parse_loop; } CollationSorter sorter = new CollationSorter(); sorter.comparisonType = operator; sorter.textElement = sb.toString(); sorter.hashText = sorter.textElement.hashCode(); sorter.offset = base_offset+rules.length(); sb.setLength(0); v.add(sorter); operator = type; } if (operator >= 0) { CollationSorter sorter = new CollationSorter(); int pos = rules.length() + base_offset; if ((sb.length() != 0 && nextIsModifier) || (sb.length() == 0 && !nextIsModifier && !eatingChars)) throw new ParseException("text element empty at " + pos, pos); sorter.comparisonType = operator; sorter.textElement = sb.toString(); sorter.hashText = sorter.textElement.hashCode(); sorter.offset = base_offset+pos; v.add(sorter); } if (i == rules.length()) return -1; else return i; } /** * This method creates a copy of this object. * * @return A copy of this object. */ public Object clone() { return super.clone(); } /** * This method completely parses a string 'rules' containing sorting rules. * * @param rules String containing the rules to be parsed. * @return A set of sorting instructions stored in a Vector. * @throws ParseException if something turned wrong during the parsing. To get details * decode the message. */ private Vector parseString(String rules) throws ParseException { Vector v = new Vector(); // result of the first subParseString is not absolute (may be -1 or a // positive integer). But we do not care. subParseString(false, v, 0, rules); return v; } /** * This method uses the sorting instructions built by {@link #parseString} * to build collation elements which can be directly used to sort strings. * * @param parsedElements Parsed instructions stored in a Vector. * @throws ParseException if the order of the instructions are not valid. */ private void buildCollationVector(Vector parsedElements) throws ParseException { int primary_seq = 0; short secondary_seq = 0; short tertiary_seq = 0; short equality_seq = 0; boolean inverseComparisons = false; final boolean DECREASING = false; final boolean INCREASING = true; boolean secondaryType = INCREASING; Vector v = new Vector(); // elts is completely sorted.element_loop: for (int i = 0; i < parsedElements.size(); i++) { CollationSorter elt = (CollationSorter) parsedElements.elementAt(i); boolean ignoreChar = false; switch (elt.comparisonType) { case CollationSorter.GREATERP: primary_seq++; if (inverseComparisons) { secondary_seq = Short.MAX_VALUE; secondaryType = DECREASING; } else { secondary_seq = 0; secondaryType = INCREASING; } tertiary_seq = 0; equality_seq = 0; inverseComparisons = false; break; case CollationSorter.GREATERS: if (secondaryType == DECREASING) secondary_seq--; else secondary_seq++; tertiary_seq = 0; equality_seq = 0; break; case CollationSorter.INVERSE_SECONDARY: inverseComparisons = true; continue element_loop; case CollationSorter.GREATERT: tertiary_seq++; equality_seq = 0; break; case CollationSorter.IGNORE: ignoreChar = true; case CollationSorter.EQUAL: equality_seq++; break; case CollationSorter.RESET: throw new ParseException ("Invalid reached state 'RESET'. Internal error", elt.offset); default: throw new ParseException ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset); } CollationElement e; if (!ignoreChar) { e = new CollationElement(elt.textElement, primary_seq, secondary_seq, tertiary_seq, equality_seq, elt.expansionOrdering); } else e = new CollationElement(elt.textElement); v.add(e); } this.inverseAccentComparison = inverseComparisons; ce_table = v.toArray(); last_primary_value = primary_seq+1; } /** * Build a tree where all keys are the texts of collation elements and data is * the collation element itself. The tree is used when extracting all prefix * for a given text. */ private void buildPrefixAccess() { prefix_tree = new HashMap(); for (int i = 0; i < ce_table.length; i++) { CollationElement e = (CollationElement) ce_table[i]; prefix_tree.put(e.key, e); } } /** * This method returns an integer which indicates whether the first * specified <code>String</code> is less than, greater than, or equal to * the second. The value depends not only on the collation rules in * effect, but also the strength and decomposition settings of this object. * * @param source The first <code>String</code> to compare. * @param target A second <code>String</code> to compare to the first. * * @return A negative integer if source < target, a positive integer * if source > target, or 0 if source == target. */ public int compare(String source, String target) { CollationElementIterator cs, ct; cs = getCollationElementIterator(source); ct = getCollationElementIterator(target); for(;;) { CollationElement ord1block = cs.nextBlock(); CollationElement ord2block = ct.nextBlock(); int ord1; int ord2; if (ord1block != null) ord1 = ord1block.getValue(); else { if (ord2block == null) return 0; return -1; } if (ord2block == null) return 1; ord2 = ord2block.getValue(); // We know chars are totally equal, so skip if (ord1 == ord2) { if (getStrength() == IDENTICAL) if (!ord1block.key.equals(ord2block.key)) return ord1block.key.compareTo(ord2block.key); continue; } // Check for primary strength differences int prim1 = cs.primaryOrder(ord1); int prim2 = ct.primaryOrder(ord2); if (prim1 < prim2) return -1; else if (prim1 > prim2) return 1; else if (getStrength() == PRIMARY) continue; // Check for secondary strength differences int sec1 = cs.secondaryOrder(ord1); int sec2 = ct.secondaryOrder(ord2); if (sec1 < sec2) return -1; else if (sec1 > sec2) return 1; else if (getStrength() == SECONDARY) continue; // Check for tertiary differences int tert1 = cs.tertiaryOrder(ord1); int tert2 = ct.tertiaryOrder(ord2); if (tert1 < tert2) return -1; else if (tert1 > tert2) return 1; else if (getStrength() == TERTIARY) continue; // Apparently JDK does this (at least for my test case). return ord1block.key.compareTo(ord2block.key); } } /** * This method tests this object for equality against the specified * object. This will be true if and only if the specified object is * another reference to this object. * * @param obj The <code>Object</code> to compare against this object. * * @return <code>true</code> if the specified object is equal to this object, * <code>false</code> otherwise. */ public boolean equals(Object obj) { if (obj == this) return true; else return false; } /** * This method builds a default collation element without invoking * the database created from the rules passed to the constructor. * * @param c Character which needs a collation element. * @return A valid brand new CollationElement instance. */ CollationElement getDefaultElement(char c) { int v; // Preliminary support for generic accent sorting inversion (I don't know if all // characters in the range should be sorted backward). This is the place // to fix this if needed. if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) v = 0x0361 - ((int) c - 0x02B9); else v = (short) c; return new CollationElement("" + c, last_primary_value + v, (short) 0, (short) 0, (short) 0, null); } /** * This method returns an instance for <code>CollationElementIterator</code> * for the specified <code>String</code> under the collation rules for this * object. * * @param source The <code>String</code> to return the * <code>CollationElementIterator</code> instance for. * * @return A <code>CollationElementIterator</code> for the specified * <code>String</code>. */ public CollationElementIterator getCollationElementIterator(String source) { return new CollationElementIterator(this, source); } /** * This method returns an instance of <code>CollationElementIterator</code> * for the <code>String</code> represented by the specified * <code>CharacterIterator</code>. * * @param source The <code>CharacterIterator</code> with the desired <code>String</code>. * * @return A <code>CollationElementIterator</code> for the specified <code>String</code>. */ public CollationElementIterator getCollationElementIterator(CharacterIterator source) { StringBuffer expand = new StringBuffer(""); // Right now we assume that we will read from the beginning of the string. for (char c = source.first(); c != CharacterIterator.DONE; c = source.next()) decomposeCharacter(c, expand); return getCollationElementIterator(expand.toString()); } /** * This method returns an instance of <code>CollationKey</code> for the * specified <code>String</code>. The object returned will have a * more efficient mechanism for its comparison function that could * provide speed benefits if multiple comparisons are performed, such * as during a sort. * * @param source The <code>String</code> to create a <code>CollationKey</code> for. * * @return A <code>CollationKey</code> for the specified <code>String</code>. */ public CollationKey getCollationKey(String source) { CollationElementIterator cei = getCollationElementIterator(source); Vector vect = new Vector(25); int ord = cei.next(); cei.reset(); //set to start of string while (ord != CollationElementIterator.NULLORDER) { switch (getStrength()) { case PRIMARY: ord = cei.primaryOrder(ord); break; case SECONDARY: ord = cei.secondaryOrder(ord); default: break; } vect.add(new Integer(ord)); ord = cei.next(); //increment to next key } Object[] objarr = vect.toArray(); byte[] key = new byte[objarr.length * 4]; for (int i = 0; i < objarr.length; i++) { int j = ((Integer) objarr[i]).intValue(); key [i * 4] = (byte) ((j & 0xFF000000) >> 24); key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16); key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8); key [i * 4 + 3] = (byte) (j & 0x000000FF); } return new CollationKey(this, source, key); } /** * This method returns a <code>String</code> containing the collation rules * for this object. * * @return The collation rules for this object. */ public String getRules() { return rules; } /** * This method returns a hash value for this object. * * @return A hash value for this object. */ public int hashCode() { return System.identityHashCode(this); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?