📄 pattern.java
字号:
* * @param input * The character sequence to be split * * @return The array of strings computed by splitting the input * around matches of this pattern */ public String[] split(CharSequence input) { return split(input, 0); } /** * Recompile the Pattern instance from a stream. The original pattern * string is read in and the object tree is recompiled from it. */ private void readObject(java.io.ObjectInputStream s) throws java.io.IOException, ClassNotFoundException { // Read in all fields s.defaultReadObject(); // Initialize counts groupCount = 1; localCount = 0; // Recompile object tree if (pattern.length() > 0) compile(); else root = new Start(lastAccept); } /** * This private constructor is used to create all Patterns. The pattern * string and match flags are all that is needed to completely describe * a Pattern. An empty pattern string results in an object tree with * only a Start node and a LastNode node. */ private Pattern(String p, int f) { pattern = p; flags = f; // Reset group index count groupCount = 1; localCount = 0; if (pattern.length() > 0) { compile(); } else { root = new Start(lastAccept); matchRoot = lastAccept; } } /** * The pattern is converted to normalizedD form and then a pure group * is constructed to match canonical equivalences of the characters. */ private void normalize() { boolean inCharClass = false; char lastChar = 0xffff; // Convert pattern into normalizedD form normalizedPattern = Normalizer.decompose(pattern, false, 0); patternLength = normalizedPattern.length(); // Modify pattern to match canonical equivalences StringBuffer newPattern = new StringBuffer(patternLength); for(int i=0; i<patternLength; i++) { char c = normalizedPattern.charAt(i); StringBuffer sequenceBuffer; if ((Character.getType(c) == Character.NON_SPACING_MARK) && (lastChar != 0xffff)) { sequenceBuffer = new StringBuffer(); sequenceBuffer.append(lastChar); sequenceBuffer.append(c); while(Character.getType(c) == Character.NON_SPACING_MARK) { i++; if (i >= patternLength) break; c = normalizedPattern.charAt(i); sequenceBuffer.append(c); } String ea = produceEquivalentAlternation( sequenceBuffer.toString()); newPattern.setLength(newPattern.length()-1); newPattern.append("(?:").append(ea).append(")"); } else if (c == '[' && lastChar != '\\') { i = normalizeCharClass(newPattern, i); } else { newPattern.append(c); } lastChar = c; } normalizedPattern = newPattern.toString(); } /** * Complete the character class being parsed and add a set * of alternations to it that will match the canonical equivalences * of the characters within the class. */ private int normalizeCharClass(StringBuffer newPattern, int i) { StringBuffer charClass = new StringBuffer(); StringBuffer eq = null; char lastChar = 0xffff; String result; i++; charClass.append("["); while(true) { char c = normalizedPattern.charAt(i); StringBuffer sequenceBuffer; if (c == ']' && lastChar != '\\') { charClass.append(c); break; } else if (Character.getType(c) == Character.NON_SPACING_MARK) { sequenceBuffer = new StringBuffer(); sequenceBuffer.append(lastChar); while(Character.getType(c) == Character.NON_SPACING_MARK) { sequenceBuffer.append(c); i++; if (i >= normalizedPattern.length()) break; c = normalizedPattern.charAt(i); } String ea = produceEquivalentAlternation( sequenceBuffer.toString()); charClass.setLength(charClass.length()-1); if (eq == null) eq = new StringBuffer(); eq.append('|'); eq.append(ea); } else { charClass.append(c); i++; } if (i == normalizedPattern.length()) error("Unclosed character class"); lastChar = c; } if (eq != null) { result = new String("(?:"+charClass.toString()+ eq.toString()+")"); } else { result = charClass.toString(); } newPattern.append(result); return i; } /** * Given a specific sequence composed of a regular character and * combining marks that follow it, produce the alternation that will * match all canonical equivalences of that sequence. */ private String produceEquivalentAlternation(String source) { if (source.length() == 1) return new String(source); String base = source.substring(0,1); String combiningMarks = source.substring(1); String[] perms = producePermutations(combiningMarks); StringBuffer result = new StringBuffer(source); // Add combined permutations for(int x=0; x<perms.length; x++) { String next = base + perms[x]; if (x>0) result.append("|"+next); next = composeOneStep(next); if (next != null) result.append("|"+produceEquivalentAlternation(next)); } return result.toString(); } /** * Returns an array of strings that have all the possible * permutations of the characters in the input string. * This is used to get a list of all possible orderings * of a set of combining marks. Note that some of the permutations * are invalid because of combining class collisions, and these * possibilities must be removed because they are not canonically * equivalent. */ private String[] producePermutations(String input) { if (input.length() == 1) return new String[] {input}; if (input.length() == 2) { if (getClass(input.charAt(1)) == getClass(input.charAt(0))) { return new String[] {input}; } String[] result = new String[2]; result[0] = input; StringBuffer sb = new StringBuffer(2); sb.append(input.charAt(1)); sb.append(input.charAt(0)); result[1] = sb.toString(); return result; } int length = 1; for(int x=1; x<input.length(); x++) length = length * (x+1); String[] temp = new String[length]; int combClass[] = new int[input.length()]; for(int x=0; x<input.length(); x++) combClass[x] = getClass(input.charAt(x)); // For each char, take it out and add the permutations // of the remaining chars int index = 0;loop: for(int x=0; x<input.length(); x++) { boolean skip = false; for(int y=x-1; y>=0; y--) { if (combClass[y] == combClass[x]) { continue loop; } } StringBuffer sb = new StringBuffer(input); String otherChars = sb.delete(x, x+1).toString(); String[] subResult = producePermutations(otherChars); String prefix = input.substring(x, x+1); for(int y=0; y<subResult.length; y++) temp[index++] = prefix + subResult[y]; } String[] result = new String[index]; for (int x=0; x<index; x++) result[x] = temp[x]; return result; } private int getClass(char c) { return Normalizer.getClass(c); } /** * Attempts to compose input by combining the first character * with the first combining mark following it. Returns a String * that is the composition of the leading character with its first * combining mark followed by the remaining combining marks. Returns * null if the first two chars cannot be further composed. */ private String composeOneStep(String input) { String firstTwoChars = input.substring(0,2); String result = Normalizer.compose(firstTwoChars, false, 0); if (result.equals(firstTwoChars)) return null; else { String remainder = input.substring(2); return result + remainder; } } /** * Copies regular expression to a char array and inovkes the parsing * of the expression which will create the object tree. */ private void compile() { // Handle canonical equivalences if (has(CANON_EQ)) { normalize(); } else { normalizedPattern = pattern; } // Copy pattern to char array for convenience patternLength = normalizedPattern.length(); temp = new char[patternLength + 2]; // Use double null characters to terminate pattern normalizedPattern.getChars(0, patternLength, temp, 0); temp[patternLength] = 0; temp[patternLength + 1] = 0; // Allocate all temporary objects here. buffer = new char[32]; groupNodes = new GroupHead[10]; // Start recursive decedent parsing matchRoot = expr(lastAccept); // Check extra pattern characters if (patternLength != cursor) { if (peek() == ')') { error("Unmatched closing ')'"); } else { error("Unexpected internal error"); } } // Peephole optimization if (matchRoot instanceof Slice) { root = BnM.optimize(matchRoot); if (root == matchRoot) { root = new Start(matchRoot); } } else if (matchRoot instanceof Begin || matchRoot instanceof First) { root = matchRoot; } else { root = new Start(matchRoot); } // Release temporary storage temp = null; buffer = null; groupNodes = null; patternLength = 0; } /** * Used to print out a subtree of the Pattern to help with debugging. */ private static void printObjectTree(Node node) { while(node != null) { if (node instanceof Prolog) { System.out.println(node);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -