📄 re.java
字号:
if (addition.size() == 0) addition = null; result.token = new RETokenOneOf(subIndex, options, addition, negative); result.index = index - 1; result.returnAtAndOperator = true; return result; } // The precedence of the operator "&&" is the lowest. // So we postpone adding "&" until other elements // are added. And we insert Boolean.FALSE at the // beginning of the list of tokens following "&&". // So, "&&[a-b][k-m]" will be stored in the Vecter // addition in this order: // Boolean.FALSE, [a-b], "|", [k-m], "|", "&" if (additionAndAppeared) addition.addElement("&"); addition.addElement(Boolean.FALSE); additionAndAppeared = true; // The part on which "&&" operates may be either // (1) explicitly enclosed by [] // or // (2) not enclosed by [] and terminated by the // next "&&" or the end of the character list. // Let the preceding else if block do the case (1). // We must do something in case of (2). if ((index + 1 < pLength) && (pattern[index + 1] != '[')) { ParseCharClassResult result = parseCharClass( subIndex, pattern, index+1, pLength, cflags, syntax, RETURN_AT_AND); addition.addElement(result.token); addition.addElement("|"); // If the method returned at the next "&&", it is OK. // Otherwise we have eaten the mark of the end of this // character list "]". In this case we must give back // the end mark. index = (result.returnAtAndOperator ? result.index: result.index - 1); } } else { if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens)); lastChar = ch; lastCharIsSet = true; } if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); } // while in list // Out of list, index is one past ']' if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens)); ParseCharClassResult result = new ParseCharClassResult(); // Create a new RETokenOneOf options.trimToSize(); if (additionAndAppeared) addition.addElement("&"); if (addition.size() == 0) addition = null; result.token = new RETokenOneOf(subIndex,options, addition, negative); result.index = index; return result; } private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException { unit.ch = input[index++]; unit.bk = (unit.ch == '\\' && (!quot || index >= input.length || input[index] == 'E')); if (unit.bk) if (index < input.length) unit.ch = input[index++]; else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index); return index; } private static int parseInt(char[] input, int pos, int len, int radix) { int ret = 0; for (int i = pos; i < pos + len; i++) { ret = ret * radix + Character.digit(input[i], radix); } return ret; } /** * This class represents various expressions for a character. * "a" : 'a' itself. * "\0123" : Octal char 0123 * "\x1b" : Hex char 0x1b * "\u1234" : Unicode char \u1234 */ private static class CharExpression { /** character represented by this expression */ char ch; /** String expression */ String expr; /** length of this expression */ int len; public String toString() { return expr; } } private static CharExpression getCharExpression(char[] input, int pos, int lim, RESyntax syntax) { CharExpression ce = new CharExpression(); char c = input[pos]; if (c == '\\') { if (pos + 1 >= lim) return null; c = input[pos + 1]; switch(c) { case 't': ce.ch = '\t'; ce.len = 2; break; case 'n': ce.ch = '\n'; ce.len = 2; break; case 'r': ce.ch = '\r'; ce.len = 2; break; case 'x': case 'u': if ((c == 'x' && syntax.get(RESyntax.RE_HEX_CHAR)) || (c == 'u' && syntax.get(RESyntax.RE_UNICODE_CHAR))) { int l = 0; int expectedLength = (c == 'x' ? 2 : 4); for (int i = pos + 2; i < pos + 2 + expectedLength; i++) { if (i >= lim) break; if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'A' && input[i] <= 'F') || (input[i] >= 'a' && input[i] <= 'f'))) break; l++; } if (l != expectedLength) return null; ce.ch = (char)(parseInt(input, pos + 2, l, 16)); ce.len = l + 2; } else { ce.ch = c; ce.len = 2; } break; case '0': if (syntax.get(RESyntax.RE_OCTAL_CHAR)) { int l = 0; for (int i = pos + 2; i < pos + 2 + 3; i++) { if (i >= lim) break; if (input[i] < '0' || input[i] > '7') break; l++; } if (l == 3 && input[pos + 2] > '3') l--; if (l <= 0) return null; ce.ch = (char)(parseInt(input, pos + 2, l, 8)); ce.len = l + 2; } else { ce.ch = c; ce.len = 2; } break; default: ce.ch = c; ce.len = 2; break; } } else { ce.ch = input[pos]; ce.len = 1; } ce.expr = new String(input, pos, ce.len); return ce; } /** * This class represents a substring in a pattern string expressing * a named property. * "\pA" : Property named "A" * "\p{prop}" : Property named "prop" * "\PA" : Property named "A" (Negated) * "\P{prop}" : Property named "prop" (Negated) */ private static class NamedProperty { /** Property name */ String name; /** Negated or not */ boolean negate; /** length of this expression */ int len; } private static NamedProperty getNamedProperty(char[] input, int pos, int lim) { NamedProperty np = new NamedProperty(); char c = input[pos]; if (c == '\\') { if (++pos >= lim) return null; c = input[pos++]; switch(c) { case 'p': np.negate = false; break; case 'P': np.negate = true; break; default: return null; } c = input[pos++]; if (c == '{') { int p = -1; for (int i = pos; i < lim; i++) { if (input[i] == '}') { p = i; break; } } if (p < 0) return null; int len = p - pos; np.name = new String(input, pos, len); np.len = len + 4; } else { np.name = new String(input, pos - 1, 1); np.len = 3; } return np; } else return null; } private static RETokenNamedProperty getRETokenNamedProperty( int subIndex, NamedProperty np, boolean insens, int index) throws REException { try { return new RETokenNamedProperty(subIndex, np.name, insens, np.negate); } catch (REException e) { REException ree; ree = new REException(e.getMessage(), REException.REG_ESCAPE, index); ree.initCause(e); throw ree; } } /** * Checks if the regular expression matches the input in its entirety. * * @param input The input text. */ public boolean isMatch(Object input) { return isMatch(input,0,0); } /** * Checks if the input string, starting from index, is an exact match of * this regular expression. * * @param input The input text. * @param index The offset index at which the search should be begin. */ public boolean isMatch(Object input,int index) { return isMatch(input,index,0); } /** * Checks if the input, starting from index and using the specified * execution flags, is an exact match of this regular expression. * * @param input The input text. * @param index The offset index at which the search should be begin. * @param eflags The logical OR of any execution flags above. */ public boolean isMatch(Object input,int index,int eflags) { return isMatchImpl(makeCharIndexed(input,index),index,eflags); } private boolean isMatchImpl(CharIndexed input, int index, int eflags) { if (firstToken == null) // Trivial case return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS); REMatch m = new REMatch(numSubs, index, eflags); if (firstToken.match(input, m)) { if (m != null) { if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) { return true; } } } return false; } /** * Returns the maximum number of subexpressions in this regular expression. * If the expression contains branches, the value returned will be the * maximum subexpressions in any of the branches. */ public int getNumSubs() { return numSubs; } // Overrides REToken.setUncle void setUncle(REToken uncle) { if (lastToken != null) { lastToken.setUncle(uncle); } else super.setUncle(uncle); // to deal with empty subexpressions } // Overrides REToken.chain boolean chain(REToken next) { super.chain(next); setUncle(next); return true; } /** * Returns the minimum number of characters that could possibly * constitute a match of this regular expression. */ public int getMinimumLength() { return minimumLength; } public int getMaximumLength() { return maximumLength; } /** * Returns an array of all matches found in the input. * * If the regular expression allows the empty string to match, it will * substitute matches at all positions except the end of the input. * * @param input The input text. * @return a non-null (but possibly zero-length) array of matches */ public REMatch[] getAllMatches(Object input) { return getAllMatches(input,0,0); } /** * Returns an array of all matches found in the input, * beginning at the specified index position. * * If the regular expression allows the empty string to match, it will * substitute matches at all positions except the end of the input. * * @param input The input text. * @param index The offset index at which the search should be begin. * @return a non-null (but possibly zero-length) array of matches */ public REMatch[] getAllMatches(Object input, int index) { return getAllMatches(input,index,0); } /** * Returns an array of all matches found in the input string, * beginning at the specified index position and using the specified * execution flags. * * If the regular expression allows the empty string to match, it will * substitute matches at all positions except the end of the input. * * @param input The input text. * @param index The offset index at which the search should be begin. * @param eflags The logical OR of any execution flags above. * @return a non-null (but possibly zero-length) array of matches */ public REMatch[] getAllMatches(Object input, int index, int eflags) { return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags); } // this has been changed since 1.03 to be non-overlapping matches private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) { Vector all = new Vector(); REMatch m = null; while ((m = getMatchImpl(input,index,eflags,null)) != null) { all.addElement(m); index = m.getEndIndex();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -