📄 re.java
字号:
if (quot) unit.bk = false; // ALTERNATION OPERATOR // \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT) // not available if RE_LIMITED_OPS is set // TODO: the '\n' literal here should be a test against REToken.newline, // which unfortunately may be more than a single character. if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot))) || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) ) && !syntax.get(RESyntax.RE_LIMITED_OPS)) { // make everything up to here be a branch. create vector if nec. addToken(currentToken); RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength); minimumLength = 0; if (branches == null) { branches = new Vector(); } branches.addElement(theBranch); firstToken = lastToken = currentToken = null; } // INTERVAL OPERATOR: // {x} | {x,} | {x,y} (RE_INTERVALS && RE_NO_BK_BRACES) // \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES) // // OPEN QUESTION: // what is proper interpretation of '{' at start of string? else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) { int newIndex = getMinMax(pattern,index,minMax,syntax); if (newIndex > index) { if (minMax.first > minMax.second) throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex); if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex); if (currentToken instanceof RETokenRepeated) throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex); if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex); if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE)) throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex); index = newIndex; currentToken = setRepeated(currentToken,minMax.first,minMax.second,index); } else { addToken(currentToken); currentToken = new RETokenChar(subIndex,unit.ch,insens); } } // LIST OPERATOR: // [...] | [^...] else if ((unit.ch == '[') && !(unit.bk || quot)) { Vector options = new Vector(); boolean negative = false; char lastChar = 0; if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index); // Check for initial caret, negation if ((ch = pattern[index]) == '^') { negative = true; if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); ch = pattern[index]; } // Check for leading right bracket literal if (ch == ']') { lastChar = ch; if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); } while ((ch = pattern[index++]) != ']') { if ((ch == '-') && (lastChar != 0)) { if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); if ((ch = pattern[index]) == ']') { options.addElement(new RETokenChar(subIndex,lastChar,insens)); lastChar = '-'; } else { options.addElement(new RETokenRange(subIndex,lastChar,ch,insens)); lastChar = 0; index++; } } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) { if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); int posixID = -1; boolean negate = false; char asciiEsc = 0; if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) { switch (pattern[index]) { case 'D': negate = true; case 'd': posixID = RETokenPOSIX.DIGIT; break; case 'S': negate = true; case 's': posixID = RETokenPOSIX.SPACE; break; case 'W': negate = true; case 'w': posixID = RETokenPOSIX.ALNUM; break; } } else if ("nrt".indexOf(pattern[index]) != -1) { switch (pattern[index]) { case 'n': asciiEsc = '\n'; break; case 't': asciiEsc = '\t'; break; case 'r': asciiEsc = '\r'; break; } } if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens)); if (posixID != -1) { options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate)); } else if (asciiEsc != 0) { lastChar = asciiEsc; } else { lastChar = pattern[index]; } ++index; } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) { StringBuffer posixSet = new StringBuffer(); index = getPosixSet(pattern,index+1,posixSet); int posixId = RETokenPOSIX.intValue(posixSet.toString()); if (posixId != -1) options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false)); } else { if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens)); lastChar = ch; } if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); } // while in list // Out of list, index is one past ']' if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens)); // Create a new RETokenOneOf addToken(currentToken); options.trimToSize(); currentToken = new RETokenOneOf(subIndex,options,negative); } // SUBEXPRESSIONS // (...) | \(...\) depending on RE_NO_BK_PARENS else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) { boolean pure = false; boolean comment = false; boolean lookAhead = false; boolean negativelh = false; if ((index+1 < pLength) && (pattern[index] == '?')) { switch (pattern[index+1]) { case '!': if (syntax.get(RESyntax.RE_LOOKAHEAD)) { pure = true; negativelh = true; lookAhead = true; index += 2; } break; case '=': if (syntax.get(RESyntax.RE_LOOKAHEAD)) { pure = true; lookAhead = true; index += 2; } break; case ':': if (syntax.get(RESyntax.RE_PURE_GROUPING)) { pure = true; index += 2; } break; case '#': if (syntax.get(RESyntax.RE_COMMENTS)) { comment = true; } break; default: throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index); } } if (index >= pLength) { throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index); } // find end of subexpression int endIndex = index; int nextIndex = index; int nested = 0; while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0) && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) ) if ((endIndex = nextIndex) >= pLength) throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex); else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) nested++; else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) nested--; // endIndex is now position at a ')','\)' // nextIndex is end of string or position after ')' or '\)' if (comment) index = nextIndex; else { // not a comment // create RE subexpression as token. addToken(currentToken); if (!pure) { numSubs++; } int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs; currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs); numSubs += ((RE) currentToken).getNumSubs(); if (lookAhead) { currentToken = new RETokenLookAhead(currentToken,negativelh); } index = nextIndex; } // not a comment } // subexpression // UNMATCHED RIGHT PAREN // ) or \) throw exception if // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) { throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index); } // START OF LINE OPERATOR // ^ else if ((unit.ch == '^') && !(unit.bk || quot)) { addToken(currentToken); currentToken = null; addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); } // END OF LINE OPERATOR // $ else if ((unit.ch == '$') && !(unit.bk || quot)) { addToken(currentToken); currentToken = null; addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); } // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null) // . else if ((unit.ch == '.') && !(unit.bk || quot)) { addToken(currentToken); currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL)); } // ZERO-OR-MORE REPEAT OPERATOR // * else if ((unit.ch == '*') && !(unit.bk || quot)) { if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); if (currentToken instanceof RETokenRepeated) throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); if (currentToken.getMinimumLength() == 0) throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index); currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index); } // ONE-OR-MORE REPEAT OPERATOR // + | \+ depending on RE_BK_PLUS_QM // not available if RE_LIMITED_OPS is set else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) { if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); if (currentToken instanceof RETokenRepeated) throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); if (currentToken.getMinimumLength() == 0) throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index); currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index); } // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR // ? | \? depending on RE_BK_PLUS_QM // not available if RE_LIMITED_OPS is set // stingy matching if RE_STINGY_OPS is set and it follows a quantifier else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) { if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); // Check for stingy matching on RETokenRepeated if (currentToken instanceof RETokenRepeated) { if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy()) ((RETokenRepeated)currentToken).makeStingy(); else throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); } else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); else currentToken = setRepeated(currentToken,0,1,index); } // BACKREFERENCE OPERATOR // \1 \2 ... \9 // not available if RE_NO_BK_REFS is set else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) { addToken(currentToken); currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens); } // START OF STRING OPERATOR // \A if RE_STRING_ANCHORS is set else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { addToken(currentToken); currentToken = new RETokenStart(subIndex,null); } // WORD BREAK OPERATOR // \b if ???? else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { addToken(currentToken); currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false); } // WORD BEGIN OPERATOR
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -