📄 re.java
字号:
} else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); else currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index); } // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR // ? | \? depending on RE_BK_PLUS_QM // not available if RE_LIMITED_OPS is set // stingy matching if RE_STINGY_OPS is set and it follows a quantifier else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) { if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); // Check for stingy matching on RETokenRepeated if (currentToken instanceof RETokenRepeated) { RETokenRepeated tokenRep = (RETokenRepeated)currentToken; if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive()) tokenRep.makeStingy(); else throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); } else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); else currentToken = setRepeated(currentToken,0,1,index); } // OCTAL CHARACTER // \0377 else if (unit.bk && (unit.ch == '0') && syntax.get(RESyntax.RE_OCTAL_CHAR)) { CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax); if (ce == null) throw new REException("invalid octal character", REException.REG_ESCAPE, index); index = index - 2 + ce.len; addToken(currentToken); currentToken = new RETokenChar(subIndex,ce.ch,insens); } // BACKREFERENCE OPERATOR // \1 \2 ... \9 and \10 \11 \12 ... // not available if RE_NO_BK_REFS is set // Perl recognizes \10, \11, and so on only if enough number of // parentheses have opened before it, otherwise they are treated // as aliases of \010, \011, ... (octal characters). In case of // Sun's JDK, octal character expression must always begin with \0. // We will do as JDK does. But FIXME, take a look at "(a)(b)\29". // JDK treats \2 as a back reference to the 2nd group because // there are only two groups. But in our poor implementation, // we cannot help but treat \29 as a back reference to the 29th group. else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) { addToken(currentToken); int numBegin = index - 1; int numEnd = pLength; for (int i = index; i < pLength; i++) { if (! Character.isDigit(pattern[i])) { numEnd = i; break; } } int num = parseInt(pattern, numBegin, numEnd-numBegin, 10); currentToken = new RETokenBackRef(subIndex,num,insens); index = numEnd; } // START OF STRING OPERATOR // \A if RE_STRING_ANCHORS is set else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { addToken(currentToken); currentToken = new RETokenStart(subIndex,null); } // WORD BREAK OPERATOR // \b if ???? else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { addToken(currentToken); currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false); } // WORD BEGIN OPERATOR // \< if ???? else if (unit.bk && (unit.ch == '<')) { addToken(currentToken); currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false); } // WORD END OPERATOR // \> if ???? else if (unit.bk && (unit.ch == '>')) { addToken(currentToken); currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false); } // NON-WORD BREAK OPERATOR // \B if ???? else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { addToken(currentToken); currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true); } // DIGIT OPERATOR // \d if RE_CHAR_CLASS_ESCAPES is set else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false); } // NON-DIGIT OPERATOR // \D else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true); } // NEWLINE ESCAPE // \n else if (unit.bk && (unit.ch == 'n')) { addToken(currentToken); currentToken = new RETokenChar(subIndex,'\n',false); } // RETURN ESCAPE // \r else if (unit.bk && (unit.ch == 'r')) { addToken(currentToken); currentToken = new RETokenChar(subIndex,'\r',false); } // WHITESPACE OPERATOR // \s if RE_CHAR_CLASS_ESCAPES is set else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false); } // NON-WHITESPACE OPERATOR // \S else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true); } // TAB ESCAPE // \t else if (unit.bk && (unit.ch == 't')) { addToken(currentToken); currentToken = new RETokenChar(subIndex,'\t',false); } // ALPHANUMERIC OPERATOR // \w else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false); } // NON-ALPHANUMERIC OPERATOR // \W else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true); } // END OF STRING OPERATOR // \Z else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { addToken(currentToken); currentToken = new RETokenEnd(subIndex,null); } // HEX CHARACTER, UNICODE CHARACTER // \x1B, \u1234 else if ((unit.bk && (unit.ch == 'x') && syntax.get(RESyntax.RE_HEX_CHAR)) || (unit.bk && (unit.ch == 'u') && syntax.get(RESyntax.RE_UNICODE_CHAR))) { CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax); if (ce == null) throw new REException("invalid hex character", REException.REG_ESCAPE, index); index = index - 2 + ce.len; addToken(currentToken); currentToken = new RETokenChar(subIndex,ce.ch,insens); } // NAMED PROPERTY // \p{prop}, \P{prop} else if ((unit.bk && (unit.ch == 'p') && syntax.get(RESyntax.RE_NAMED_PROPERTY)) || (unit.bk && (unit.ch == 'P') && syntax.get(RESyntax.RE_NAMED_PROPERTY))) { NamedProperty np = getNamedProperty(pattern, index - 2, pLength); if (np == null) throw new REException("invalid escape sequence", REException.REG_ESCAPE, index); index = index - 2 + np.len; addToken(currentToken); currentToken = getRETokenNamedProperty(subIndex,np,insens,index); } // NON-SPECIAL CHARACTER (or escape to make literal) // c | \* for example else { // not a special character addToken(currentToken); currentToken = new RETokenChar(subIndex,unit.ch,insens); } } // end while // Add final buffered token and an EndSub marker addToken(currentToken); if (branches != null) { branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength, maximumLength)); branches.trimToSize(); // compact the Vector minimumLength = 0; maximumLength = 0; firstToken = lastToken = null; addToken(new RETokenOneOf(subIndex,branches,false)); } else addToken(new RETokenEndSub(subIndex)); } private static class ParseCharClassResult { RETokenOneOf token; int index; boolean returnAtAndOperator = false; } /** * Parse [...] or [^...] and make an RETokenOneOf instance. * @param subIndex subIndex to be given to the created RETokenOneOf instance. * @param pattern Input array of characters to be parsed. * @param index Index pointing to the character next to the beginning '['. * @param pLength Limit of the input array. * @param cflags Compilation flags used to parse the pattern. * @param pflags Flags that affect the behavior of this method. * @param syntax Syntax used to parse the pattern. */ private static ParseCharClassResult parseCharClass(int subIndex, char[] pattern, int index, int pLength, int cflags, RESyntax syntax, int pflags) throws REException { boolean insens = ((cflags & REG_ICASE) > 0); Vector options = new Vector(); Vector addition = new Vector(); boolean additionAndAppeared = false; final int RETURN_AT_AND = 0x01; boolean returnAtAndOperator = ((pflags & RETURN_AT_AND) != 0); boolean negative = false; char ch; char lastChar = 0; boolean lastCharIsSet = false; if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index); // Check for initial caret, negation if ((ch = pattern[index]) == '^') { negative = true; if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); ch = pattern[index]; } // Check for leading right bracket literal if (ch == ']') { lastChar = ch; lastCharIsSet = true; if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); } while ((ch = pattern[index++]) != ']') { if ((ch == '-') && (lastCharIsSet)) { if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); if ((ch = pattern[index]) == ']') { options.addElement(new RETokenChar(subIndex,lastChar,insens)); lastChar = '-'; } else { if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) { CharExpression ce = getCharExpression(pattern, index, pLength, syntax); if (ce == null) throw new REException("invalid escape sequence", REException.REG_ESCAPE, index); ch = ce.ch; index = index + ce.len - 1; } options.addElement(new RETokenRange(subIndex,lastChar,ch,insens)); lastChar = 0; lastCharIsSet = false; index++; } } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) { if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); int posixID = -1; boolean negate = false; char asciiEsc = 0; boolean asciiEscIsSet = false; NamedProperty np = null; if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) { switch (pattern[index]) { case 'D': negate = true; case 'd': posixID = RETokenPOSIX.DIGIT; break; case 'S': negate = true; case 's': posixID = RETokenPOSIX.SPACE; break; case 'W': negate = true; case 'w': posixID = RETokenPOSIX.ALNUM; break; } } if (("pP".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_NAMED_PROPERTY)) { np = getNamedProperty(pattern, index - 1, pLength); if (np == null) throw new REException("invalid escape sequence", REException.REG_ESCAPE, index); index = index - 1 + np.len - 1; } else { CharExpression ce = getCharExpression(pattern, index - 1, pLength, syntax); if (ce == null) throw new REException("invalid escape sequence", REException.REG_ESCAPE, index); asciiEsc = ce.ch; asciiEscIsSet = true; index = index - 1 + ce.len - 1; } if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens)); if (posixID != -1) { options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate)); } else if (np != null) { options.addElement(getRETokenNamedProperty(subIndex,np,insens,index)); } else if (asciiEscIsSet) { lastChar = asciiEsc; lastCharIsSet = true; } else { lastChar = pattern[index]; lastCharIsSet = true; } ++index; } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) { StringBuffer posixSet = new StringBuffer(); index = getPosixSet(pattern,index+1,posixSet); int posixId = RETokenPOSIX.intValue(posixSet.toString()); if (posixId != -1) options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false)); } else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) { ParseCharClassResult result = parseCharClass( subIndex, pattern, index, pLength, cflags, syntax, 0); addition.addElement(result.token); addition.addElement("|"); index = result.index; } else if ((ch == '&') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS)) && (index < pLength) && (pattern[index] == '&')) { if (returnAtAndOperator) { ParseCharClassResult result = new ParseCharClassResult(); options.trimToSize(); if (additionAndAppeared) addition.addElement("&");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -