📄 regexparser.java
字号:
} Token processBacksolidus_z() throws ParseException { this.next(); return Token.token_stringend; } Token processBacksolidus_b() throws ParseException { this.next(); return Token.token_wordedge; } Token processBacksolidus_B() throws ParseException { this.next(); return Token.token_not_wordedge; } Token processBacksolidus_lt() throws ParseException { this.next(); return Token.token_wordbeginning; } Token processBacksolidus_gt() throws ParseException { this.next(); return Token.token_wordend; } Token processStar(Token tok) throws ParseException { this.next(); if (this.read() == T_QUESTION) { this.next(); return Token.createNGClosure(tok); } else return Token.createClosure(tok); } Token processPlus(Token tok) throws ParseException { // X+ -> XX* this.next(); if (this.read() == T_QUESTION) { this.next(); return Token.createConcat(tok, Token.createNGClosure(tok)); } else return Token.createConcat(tok, Token.createClosure(tok)); } Token processQuestion(Token tok) throws ParseException { // X? -> X| this.next(); Token par = Token.createUnion(); if (this.read() == T_QUESTION) { this.next(); par.addChild(Token.createEmpty()); par.addChild(tok); } else { par.addChild(tok); par.addChild(Token.createEmpty()); } return par; } boolean checkQuestion(int off) { return off < this.regexlen && this.regex.charAt(off) == '?'; } Token processParen() throws ParseException { this.next(); int p = this.parennumber++; Token tok = Token.createParen(this.parseRegex(), p); if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); this.next(); // Skips ')' return tok; } Token processParen2() throws ParseException { this.next(); Token tok = Token.createParen(this.parseRegex(), 0); if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); this.next(); // Skips ')' return tok; } Token processCondition() throws ParseException { // this.offset points the next of '(' if (this.offset+1 >= this.regexlen) throw ex("parser.factor.4", this.offset); // Parses a condition. int refno = -1; Token condition = null; int ch = this.regex.charAt(this.offset); if ('1' <= ch && ch <= '9') { refno = ch-'0'; this.hasBackReferences = true; if (this.references == null) this.references = new Vector(); this.references.addElement(new ReferencePosition(refno, this.offset)); this.offset ++; if (this.regex.charAt(this.offset) != ')') throw ex("parser.factor.1", this.offset); this.offset ++; } else { if (ch == '?') this.offset --; // Points '('. this.next(); condition = this.parseFactor(); switch (condition.type) { case Token.LOOKAHEAD: case Token.NEGATIVELOOKAHEAD: case Token.LOOKBEHIND: case Token.NEGATIVELOOKBEHIND: break; case Token.ANCHOR: if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); break; default: throw ex("parser.factor.5", this.offset); } } // Parses yes/no-patterns. this.next(); Token yesPattern = this.parseRegex(); Token noPattern = null; if (yesPattern.type == Token.UNION) { if (yesPattern.size() != 2) throw ex("parser.factor.6", this.offset); noPattern = yesPattern.getChild(1); yesPattern = yesPattern.getChild(0); } if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); this.next(); return Token.createCondition(refno, condition, yesPattern, noPattern); } Token processModifiers() throws ParseException { // this.offset points the next of '?'. // modifiers ::= [imsw]* ('-' [imsw]*)? ':' int add = 0, mask = 0, ch = -1; while (this.offset < this.regexlen) { ch = this.regex.charAt(this.offset); int v = REUtil.getOptionValue(ch); if (v == 0) break; // '-' or ':'? add |= v; this.offset ++; } if (this.offset >= this.regexlen) throw ex("parser.factor.2", this.offset-1); if (ch == '-') { this.offset ++; while (this.offset < this.regexlen) { ch = this.regex.charAt(this.offset); int v = REUtil.getOptionValue(ch); if (v == 0) break; // ':'? mask |= v; this.offset ++; } if (this.offset >= this.regexlen) throw ex("parser.factor.2", this.offset-1); } Token tok; if (ch == ':') { this.offset ++; this.next(); tok = Token.createModifierGroup(this.parseRegex(), add, mask); if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); this.next(); } else if (ch == ')') { // such as (?-i) this.offset ++; this.next(); tok = Token.createModifierGroup(this.parseRegex(), add, mask); } else throw ex("parser.factor.3", this.offset); return tok; } Token processIndependent() throws ParseException { this.next(); Token tok = Token.createLook(Token.INDEPENDENT, this.parseRegex()); if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); this.next(); // Skips ')' return tok; } Token processBacksolidus_c() throws ParseException { int ch2; // Must be in 0x0040-0x005f if (this.offset >= this.regexlen || ((ch2 = this.regex.charAt(this.offset++)) & 0xffe0) != 0x0040) throw ex("parser.atom.1", this.offset-1); this.next(); return Token.createChar(ch2-0x40); } Token processBacksolidus_C() throws ParseException { throw ex("parser.process.1", this.offset); } Token processBacksolidus_i() throws ParseException { Token tok = Token.createChar('i'); this.next(); return tok; } Token processBacksolidus_I() throws ParseException { throw ex("parser.process.1", this.offset); } Token processBacksolidus_g() throws ParseException { this.next(); return Token.getGraphemePattern(); } Token processBacksolidus_X() throws ParseException { this.next(); return Token.getCombiningCharacterSequence(); } Token processBackreference() throws ParseException { int refnum = this.chardata-'0'; Token tok = Token.createBackReference(refnum); this.hasBackReferences = true; if (this.references == null) this.references = new Vector(); this.references.addElement(new ReferencePosition(refnum, this.offset-2)); this.next(); return tok; } // ---------------------------------------------------------------- /** * factor ::= ('^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\<' | '\>' * | atom (('*' | '+' | '?' | minmax ) '?'? )?) * | '(?=' regex ')' | '(?!' regex ')' | '(?<=' regex ')' | '(?<!' regex ')' * | '(?#' [^)]* ')' * minmax ::= '{' min (',' max?)? '}' * min ::= [0-9]+ * max ::= [0-9]+ */ Token parseFactor() throws ParseException { int ch = this.read(); Token tok; switch (ch) { case T_CARET: return this.processCaret(); case T_DOLLAR: return this.processDollar(); case T_LOOKAHEAD: return this.processLookahead(); case T_NEGATIVELOOKAHEAD: return this.processNegativelookahead(); case T_LOOKBEHIND: return this.processLookbehind(); case T_NEGATIVELOOKBEHIND: return this.processNegativelookbehind(); case T_COMMENT: this.next(); return Token.createEmpty(); case T_BACKSOLIDUS: switch (this.chardata) { case 'A': return this.processBacksolidus_A(); case 'Z': return this.processBacksolidus_Z(); case 'z': return this.processBacksolidus_z(); case 'b': return this.processBacksolidus_b(); case 'B': return this.processBacksolidus_B(); case '<': return this.processBacksolidus_lt(); case '>': return this.processBacksolidus_gt(); } // through down } tok = this.parseAtom(); ch = this.read(); switch (ch) { case T_STAR: return this.processStar(tok); case T_PLUS: return this.processPlus(tok); case T_QUESTION: return this.processQuestion(tok); case T_CHAR: if (this.chardata == '{' && this.offset < this.regexlen) { int off = this.offset; // this.offset -> next of '{' int min = 0, max = -1; if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') { min = ch -'0'; while (off < this.regexlen && (ch = this.regex.charAt(off++)) >= '0' && ch <= '9') { min = min*10 +ch-'0'; if (min < 0) throw ex("parser.quantifier.5", this.offset); } } else { throw ex("parser.quantifier.1", this.offset); } max = min; if (ch == ',') { if (off >= this.regexlen) { throw ex("parser.quantifier.3", this.offset); } else if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') { max = ch -'0'; // {min,max} while (off < this.regexlen && (ch = this.regex.charAt(off++)) >= '0' && ch <= '9') { max = max*10 +ch-'0'; if (max < 0) throw ex("parser.quantifier.5", this.offset); } if (min > max) throw ex("parser.quantifier.4", this.offset); } else { // assume {min,} max = -1; } } if (ch != '}') throw ex("parser.quantifier.2", this.offset); if (this.checkQuestion(off)) { // off -> next of '}' tok = Token.createNGClosure(tok); this.offset = off+1; } else { tok = Token.createClosure(tok); this.offset = off; } tok.setMin(min); tok.setMax(max); //System.err.println("CLOSURE: "+min+", "+max); this.next(); } } return tok; } /** * atom ::= char | '.' | char-class | '(' regex ')' | '(?:' regex ')' | '\' [0-9] * | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block * | '(?>' regex ')' * char ::= '\\' | '\' [efnrt] | bmp-code | character-1 */ Token parseAtom() throws ParseException { int ch = this.read(); Token tok = null; switch (ch) { case T_LPAREN: return this.processParen(); case T_LPAREN2: return this.processParen2(); // '(?:' case T_CONDITION: return this.processCondition(); // '(?(' case T_MODIFIERS: return this.processModifiers(); // (?modifiers ... ) case T_INDEPENDENT: return this.processIndependent(); case T_DOT: this.next(); // Skips '.' tok = Token.token_dot; break; /** * char-class ::= '[' ( '^'? range ','?)+ ']' * range ::= '\d' | '\w' | '\s' | category-block | range-char * | range-char '-' range-char * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2 * bmp-char ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] */ case T_LBRACKET: return this.parseCharacterClass(true); case T_SET_OPERATIONS: return this.parseSetOperations(); case T_BACKSOLIDUS: switch (this.chardata) { case 'd': case 'D': case 'w': case 'W': case 's': case 'S': tok = this.getTokenForShorthand(this.chardata); this.next(); return tok; case 'e': case 'f': case 'n': case 'r': case 't': case 'u': case 'v': case 'x': { int ch2 = this.decodeEscaped(); if (ch2 < 0x10000) { tok = Token.createChar(ch2); } else { tok = Token.createString(REUtil.decomposeToSurrogates(ch2)); } } break; case 'c': return this.processBacksolidus_c(); case 'C': return this.processBacksolidus_C(); case 'i': return this.processBacksolidus_i(); case 'I': return this.processBacksolidus_I();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -