📄 charset.java
字号:
* two-character codes. */ private static CharSet charSetForCategory(String category) { // throw an exception if we have anything other than one or two // characters inside the colons if (category.length() == 0 || category.length() >= 3) { throw new IllegalArgumentException("Invalid character category: " + category); } // if we have two characters, search the category map for that code // and either construct and return a CharSet from the data in the // category map or throw an exception if (category.length() == 2) { for (int i = 0; i < categoryMap.length; i++) { if (categoryMap[i][0].equals(category)) { return new CharSet(categoryMap[i][1]); } } throw new IllegalArgumentException("Invalid character category: " + category); } // if we have one character, search the category map for codes beginning // with that letter, and union together all of the matching sets that // we find (or throw an exception if there are no matches) else if (category.length() == 1) { CharSet result = new CharSet(); for (int i = 0; i < categoryMap.length; i++) { if (categoryMap[i][0].startsWith(category)) { result = result.union(new CharSet(categoryMap[i][1])); } } if (result.empty()) { throw new IllegalArgumentException("Invalid character category: " + category); } else { return result; } } return new CharSet(); // should never get here, but to make the compiler happy... } /** * Returns a copy of CharSet's expression cache and sets CharSet's * expression cache to empty. */ public static Hashtable releaseExpressionCache() { Hashtable result = expressionCache; expressionCache = null; return result; } //========================================================================== // CharSet manipulation //========================================================================== /** * Creates an empty CharSet. */ public CharSet() { chars = ""; } /** * Creates a CharSet containing a single character. * @param c The character to put into the CharSet */ public CharSet(char c) { StringBuffer temp = new StringBuffer(); temp.append(c); temp.append(c); chars = temp.toString(); } /** * Creates a CharSet containing a range of characters. * @param lo The lowest-numbered character to include in the range * @param hi The highest-numbered character to include in the range */ public CharSet(char lo, char hi) { StringBuffer temp = new StringBuffer(); if (lo <= hi) { temp.append(lo); temp.append(hi); } else { temp.append(hi); temp.append(lo); } chars = temp.toString(); } /** * Creates a CharSet, initializing it from the internal storage * of another CharSet (this function performs no error checking * on "chars", so if it's malformed, undefined behavior will result) */ private CharSet(String chars) { this.chars = chars; } /** * Returns a CharSet representing the union of two CharSets. */ public CharSet union(CharSet that) { return new CharSet(doUnion(that.chars).toString()); } /** * Adds the characters in "that" to this CharSet */ private void internalUnion(CharSet that) { chars = doUnion(that.chars).toString(); } /** * The actual implementation of the union functions */ private StringBuffer doUnion(String c2) { StringBuffer result = new StringBuffer(); int i = 0; int j = 0; // consider all the characters in both strings while (i < chars.length() && j < c2.length()) { char ub; // the first character in the result is the lower of the // starting characters of the two strings, and "ub" gets // set to the upper bound of that range if (chars.charAt(i) < c2.charAt(j)) { result.append(chars.charAt(i)); ub = chars.charAt(++i); } else { result.append(c2.charAt(j)); ub = c2.charAt(++j); } // for as long as one of our two pointers is pointing to a range's // end point, or i is pointing to a character that is less than // "ub" plus one (the "plus one" stitches touching ranges together)... while (i % 2 == 1 || j % 2 == 1 || (i < chars.length() && chars.charAt(i) <= ub + 1)) { // advance i to the first character that is greater than // "ub" plus one while (i < chars.length() && chars.charAt(i) <= ub + 1) { ++i; } // if i points to the endpoint of a range, update "ub" // to that character, or if i points to the start of // a range and the endpoint of the preceding range is // greater than "ub", update "up" to _that_ character if (i % 2 == 1) { ub = chars.charAt(i); } else if (i > 0 && chars.charAt(i - 1) > ub) { ub = chars.charAt(i - 1); } // now advance j to the first character that is greater // that "ub" plus one while (j < c2.length() && c2.charAt(j) <= ub + 1) { ++j; } // if j points to the endpoint of a range, update "ub" // to that character, or if j points to the start of // a range and the endpoint of the preceding range is // greater than "ub", update "up" to _that_ character if (j % 2 == 1) { ub = c2.charAt(j); } else if (j > 0 && c2.charAt(j - 1) > ub) { ub = c2.charAt(j - 1); } } // when we finally fall out of this loop, we will have stitched // together a series of ranges that overlap or touch, i and j // will both point to starting points of ranges, and "ub" will // be the endpoint of the range we're working on. Write "ub" // to the result result.append(ub); // loop back around to create the next range in the result } // we fall out to here when we've exhausted all the characters in // one of the operands. We can append all of the remaining characters // in the other operand without doing any extra work. if (i < chars.length()) { result.append(chars.substring(i)); } if (j < c2.length()) { result.append(c2.substring(j)); } return result; } /** * Returns the intersection of two CharSets. */ public CharSet intersection(CharSet that) { return new CharSet(doIntersection(that.chars).toString()); } /** * Removes from this CharSet any characters that aren't also in "that" */ private void internalIntersection(CharSet that) { chars = doIntersection(that.chars).toString(); } /** * The internal implementation of the two intersection functions */ private StringBuffer doIntersection(String c2) { StringBuffer result = new StringBuffer(); int i = 0; int j = 0; int oldI; int oldJ; // iterate until we've exhausted one of the operands while (i < chars.length() && j < c2.length()) { // advance j until it points to a character that is larger than // the one i points to. If this is the beginning of a one- // character range, advance j to point to the end if (i < chars.length() && i % 2 == 0) { while (j < c2.length() && c2.charAt(j) < chars.charAt(i)) { ++j; } if (j < c2.length() && j % 2 == 0 && c2.charAt(j) == chars.charAt(i)) { ++j; } } // if j points to the endpoint of a range, save the current // value of i, then advance i until it reaches a character // which is larger than the character pointed at // by j. All of the characters we've advanced over (except // the one currently pointed to by i) are added to the result oldI = i; while (j % 2 == 1 && i < chars.length() && chars.charAt(i) <= c2.charAt(j)) { ++i; } result.append(chars.substring(oldI, i)); // if i points to the endpoint of a range, save the current // value of j, then advance j until it reaches a character // which is larger than the character pointed at // by i. All of the characters we've advanced over (except // the one currently pointed to by i) are added to the result oldJ = j; while (i % 2 == 1 && j < c2.length() && c2.charAt(j) <= chars.charAt(i)) { ++j; } result.append(c2.substring(oldJ, j)); // advance i until it points to a character larger than j // If it points at the beginning of a one-character range, // advance it to the end of that range if (j < c2.length() && j % 2 == 0) { while (i < chars.length() && chars.charAt(i) < c2.charAt(j)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -