📄 collationelementiterator.java
字号:
* original string, not an offset into its corresponding sequence of * collation elements). The value returned by the next call to next() * will be the collation element corresponding to the specified position * in the text. If that position is in the middle of a contracting * character sequence, the result of the next call to next() is the * collation element for that sequence. This means that getOffset() * is not guaranteed to return the same value as was passed to a preceding * call to setOffset(). * * @param newOffset The new character offset into the original text. * @since 1.2 */ public void setOffset(int newOffset) { if (text != null) { if (newOffset < text.getBeginIndex() || newOffset >= text.getEndIndex()) { text.setIndexOnly(newOffset); } else { char c = text.setIndex(newOffset); // if the desired character isn't used in a contracting character // sequence, bypass all the backing-up logic-- we're sitting on // the right character already if (ordering.usedInContractSeq(c)) { // walk backwards through the string until we see a character // that DOESN'T participate in a contracting character sequence while (ordering.usedInContractSeq(c)) { c = text.previous(); } // now walk forward using this object's next() method until // we pass the starting point and set our current position // to the beginning of the last "character" before or at // our starting position int last = text.getIndex(); while (text.getIndex() <= newOffset) { last = text.getIndex(); next(); } text.setIndexOnly(last); // we don't need this, since last is the last index // that is the starting of the contraction which encompass // newOffset // text.previous(); } } } buffer = null; expIndex = 0; swapOrder = 0; } /** * Returns the character offset in the original text corresponding to the next * collation element. (That is, getOffset() returns the position in the text * corresponding to the collation element that will be returned by the next * call to next().) This value will always be the index of the FIRST character * corresponding to the collation element (a contracting character sequence is * when two or more characters all correspond to the same collation element). * This means if you do setOffset(x) followed immediately by getOffset(), getOffset() * won't necessarily return x. * * @return The character offset in the original text corresponding to the collation * element that will be returned by the next call to next(). * @since 1.2 */ public int getOffset() { return (text != null) ? text.getIndex() : 0; } /** * Return the maximum length of any expansion sequences that end * with the specified comparison order. * @param order a collation order returned by previous or next. * @return the maximum length of any expansion sequences ending * with the specified order. * @since 1.2 */ public int getMaxExpansion(int order) { return ordering.getMaxExpansion(order); } /** * Set a new string over which to iterate. * * @param source the new source text * @since 1.2 */ public void setText(String source) { buffer = null; swapOrder = 0; expIndex = 0; Normalizer.Mode mode = NormalizerUtilities.toNormalizerMode(owner.getDecomposition()); if (text == null) { text = new Normalizer(source, mode); } else { text.setMode(mode); text.setText(source); } } /** * Set a new string over which to iterate. * * @param source the new source text. * @since 1.2 */ public void setText(CharacterIterator source) { buffer = null; swapOrder = 0; expIndex = 0; Normalizer.Mode mode = NormalizerUtilities.toNormalizerMode(owner.getDecomposition()); if (text == null) { text = new Normalizer(source, mode); } else { text.setMode(mode); text.setText(source); } } //============================================================ // privates //============================================================ /** * Determine if a character is a Thai vowel (which sorts after * its base consonant). */ private final static boolean isThaiPreVowel(char ch) { return (ch >= '\u0e40') && (ch <= '\u0e44'); } /** * Determine if a character is a Thai base consonant */ private final static boolean isThaiBaseConsonant(char ch) { return (ch >= '\u0e01') && (ch <= '\u0e2e'); } /** * Determine if a character is a Lao vowel (which sorts after * its base consonant). */ private final static boolean isLaoPreVowel(char ch) { return (ch >= '\u0ec0') && (ch <= '\u0ec4'); } /** * Determine if a character is a Lao base consonant */ private final static boolean isLaoBaseConsonant(char ch) { return (ch >= '\u0e81') && (ch <= '\u0eae'); } /** * This method produces a buffer which contains the collation * elements for the two characters, with colFirst's values preceding * another character's. Presumably, the other character precedes colFirst * in logical order (otherwise you wouldn't need this method would you?). * The assumption is that the other char's value(s) have already been * computed. If this char has a single element it is passed to this * method as lastValue, and lastExpansion is null. If it has an * expansion it is passed in lastExpansion, and colLastValue is ignored. */ private int[] makeReorderedBuffer(char colFirst, int lastValue, int[] lastExpansion, boolean forward) { int[] result; int firstValue = ordering.getUnicodeOrder(colFirst); if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) { firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst); } int[] firstExpansion = null; if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) { firstExpansion = ordering.getExpandValueList(firstValue); } if (!forward) { int temp1 = firstValue; firstValue = lastValue; lastValue = temp1; int[] temp2 = firstExpansion; firstExpansion = lastExpansion; lastExpansion = temp2; } if (firstExpansion == null && lastExpansion == null) { result = new int [2]; result[0] = firstValue; result[1] = lastValue; } else { int firstLength = firstExpansion==null? 1 : firstExpansion.length; int lastLength = lastExpansion==null? 1 : lastExpansion.length; result = new int[firstLength + lastLength]; if (firstExpansion == null) { result[0] = firstValue; } else { System.arraycopy(firstExpansion, 0, result, 0, firstLength); } if (lastExpansion == null) { result[firstLength] = lastValue; } else { System.arraycopy(lastExpansion, 0, result, firstLength, lastLength); } } return result; } /** * Check if a comparison order is ignorable. * @return true if a character is ignorable, false otherwise. */ final static boolean isIgnorable(int order) { return ((primaryOrder(order) == 0) ? true : false); } /** * Get the ordering priority of the next contracting character in the * string. * @param ch the starting character of a contracting character token * @return the next contracting character's ordering. Returns NULLORDER * if the end of string is reached. */ private int nextContractChar(char ch) { // First get the ordering of this single character, // which is always the first element in the list Vector list = ordering.getContractValues(ch); EntryPair pair = (EntryPair)list.firstElement(); int order = pair.value; // find out the length of the longest contracting character sequence in the list. // There's logic in the builder code to make sure the longest sequence is always // the last. pair = (EntryPair)list.lastElement(); int maxLength = pair.entryName.length(); // (the Normalizer is cloned here so that the seeking we do in the next loop // won't affect our real position in the text) Normalizer tempText = (Normalizer)text.clone(); // extract the next maxLength characters in the string (we have to do this using the // Normalizer to ensure that our offsets correspond to those the rest of the // iterator is using) and store it in "fragment". tempText.previous(); key.setLength(0); char c = tempText.next(); while (maxLength > 0 && c != Normalizer.DONE) { key.append(c); --maxLength; c = tempText.next(); } String fragment = key.toString(); // now that we have that fragment, iterate through this list looking for the // longest sequence that matches the characters in the actual text. (maxLength // is used here to keep track of the length of the longest sequence) // Upon exit from this loop, maxLength will contain the length of the matching // sequence and order will contain the collation-element value corresponding // to this sequence maxLength = 1; for (int i = list.size() - 1; i > 0; i--) { pair = (EntryPair)list.elementAt(i); if (!pair.fwd) continue; if (fragment.startsWith(pair.entryName) && pair.entryName.length() > maxLength) { maxLength = pair.entryName.length(); order = pair.value; } } // seek our current iteration position to the end of the matching sequence // and return the appropriate collation-element value (if there was no matching // sequence, we're already seeked to the right position and order already contains // the correct collation-element value for the single character) while (maxLength > 1) { text.next(); --maxLength; } return order; } /** * Get the ordering priority of the previous contracting character in the * string. * @param ch the starting character of a contracting character token * @return the next contracting character's ordering. Returns NULLORDER * if the end of string is reached. */ private int prevContractChar(char ch) { // This function is identical to nextContractChar(), except that we've // switched things so that the next() and previous() calls on the Normalizer // are switched and so that we skip entry pairs with the fwd flag turned on // rather than off. Notice that we still use append() and startsWith() when // working on the fragment. This is because the entry pairs that are used // in reverse iteration have their names reversed already. Vector list = ordering.getContractValues(ch); EntryPair pair = (EntryPair)list.firstElement(); int order = pair.value; pair = (EntryPair)list.lastElement(); int maxLength = pair.entryName.length(); Normalizer tempText = (Normalizer)text.clone(); tempText.next(); key.setLength(0); char c = tempText.previous(); while (maxLength > 0 && c != Normalizer.DONE) { key.append(c); --maxLength; c = tempText.previous(); } String fragment = key.toString(); maxLength = 1; for (int i = list.size() - 1; i > 0; i--) { pair = (EntryPair)list.elementAt(i); if (pair.fwd) continue; if (fragment.startsWith(pair.entryName) && pair.entryName.length() > maxLength) { maxLength = pair.entryName.length(); order = pair.value; } } while (maxLength > 1) { text.previous(); --maxLength; } return order; } final static int UNMAPPEDCHARVALUE = 0x7FFF0000; private Normalizer text = null; private int[] buffer = null; private int expIndex = 0; private StringBuffer key = new StringBuffer(5); private int swapOrder = 0; private RBCollationTables ordering; private RuleBasedCollator owner;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -