⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexparser.java

📁 java1.6众多例子参考
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    }    Token processBacksolidus_z() throws ParseException {        this.next();        return Token.token_stringend;    }    Token processBacksolidus_b() throws ParseException {        this.next();        return Token.token_wordedge;    }    Token processBacksolidus_B() throws ParseException {        this.next();        return Token.token_not_wordedge;    }    Token processBacksolidus_lt() throws ParseException {        this.next();        return Token.token_wordbeginning;    }    Token processBacksolidus_gt() throws ParseException {        this.next();        return Token.token_wordend;    }    Token processStar(Token tok) throws ParseException {        this.next();        if (this.read() == T_QUESTION) {            this.next();            return Token.createNGClosure(tok);        } else            return Token.createClosure(tok);    }    Token processPlus(Token tok) throws ParseException {        // X+ -> XX*        this.next();        if (this.read() == T_QUESTION) {            this.next();            return Token.createConcat(tok, Token.createNGClosure(tok));        } else            return Token.createConcat(tok, Token.createClosure(tok));    }    Token processQuestion(Token tok) throws ParseException {        // X? -> X|        this.next();        Token par = Token.createUnion();        if (this.read() == T_QUESTION) {            this.next();            par.addChild(Token.createEmpty());            par.addChild(tok);        } else {            par.addChild(tok);            par.addChild(Token.createEmpty());        }        return par;    }    boolean checkQuestion(int off) {        return off < this.regexlen && this.regex.charAt(off) == '?';    }    Token processParen() throws ParseException {        this.next();        int p = this.parennumber++;        Token tok = Token.createParen(this.parseRegex(), p);        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();                            // Skips ')'        return tok;    }    Token processParen2() throws ParseException {        this.next();        Token tok = Token.createParen(this.parseRegex(), 0);        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();                            // Skips ')'        return tok;    }    Token processCondition() throws ParseException {                                                // this.offset points the next of '('        if (this.offset+1 >= this.regexlen)  throw ex("parser.factor.4", this.offset);                                                // Parses a condition.        int refno = -1;        Token condition = null;        int ch = this.regex.charAt(this.offset);        if ('1' <= ch && ch <= '9') {            refno = ch-'0';            this.hasBackReferences = true;            if (this.references == null)  this.references = new Vector();            this.references.addElement(new ReferencePosition(refno, this.offset));            this.offset ++;            if (this.regex.charAt(this.offset) != ')')  throw ex("parser.factor.1", this.offset);            this.offset ++;        } else {            if (ch == '?')  this.offset --; // Points '('.            this.next();            condition = this.parseFactor();            switch (condition.type) {              case Token.LOOKAHEAD:              case Token.NEGATIVELOOKAHEAD:              case Token.LOOKBEHIND:              case Token.NEGATIVELOOKBEHIND:                break;              case Token.ANCHOR:                if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);                break;              default:                throw ex("parser.factor.5", this.offset);            }        }                                                // Parses yes/no-patterns.        this.next();        Token yesPattern = this.parseRegex();        Token noPattern = null;        if (yesPattern.type == Token.UNION) {            if (yesPattern.size() != 2)  throw ex("parser.factor.6", this.offset);            noPattern = yesPattern.getChild(1);            yesPattern = yesPattern.getChild(0);        }        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();        return Token.createCondition(refno, condition, yesPattern, noPattern);    }    Token processModifiers() throws ParseException {                                                // this.offset points the next of '?'.                                                // modifiers ::= [imsw]* ('-' [imsw]*)? ':'        int add = 0, mask = 0, ch = -1;        while (this.offset < this.regexlen) {            ch = this.regex.charAt(this.offset);            int v = REUtil.getOptionValue(ch);            if (v == 0)  break;                 // '-' or ':'?            add |= v;            this.offset ++;        }        if (this.offset >= this.regexlen)  throw ex("parser.factor.2", this.offset-1);        if (ch == '-') {            this.offset ++;            while (this.offset < this.regexlen) {                ch = this.regex.charAt(this.offset);                int v = REUtil.getOptionValue(ch);                if (v == 0)  break;             // ':'?                mask |= v;                this.offset ++;            }            if (this.offset >= this.regexlen)  throw ex("parser.factor.2", this.offset-1);        }        Token tok;        if (ch == ':') {            this.offset ++;            this.next();            tok = Token.createModifierGroup(this.parseRegex(), add, mask);            if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);            this.next();        } else if (ch == ')') {                 // such as (?-i)            this.offset ++;            this.next();            tok = Token.createModifierGroup(this.parseRegex(), add, mask);        } else            throw ex("parser.factor.3", this.offset);        return tok;    }    Token processIndependent() throws ParseException {        this.next();        Token tok = Token.createLook(Token.INDEPENDENT, this.parseRegex());        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();                            // Skips ')'        return tok;    }    Token processBacksolidus_c() throws ParseException {        int ch2;                                // Must be in 0x0040-0x005f        if (this.offset >= this.regexlen            || ((ch2 = this.regex.charAt(this.offset++)) & 0xffe0) != 0x0040)            throw ex("parser.atom.1", this.offset-1);        this.next();        return Token.createChar(ch2-0x40);    }    Token processBacksolidus_C() throws ParseException {        throw ex("parser.process.1", this.offset);    }    Token processBacksolidus_i() throws ParseException {        Token tok = Token.createChar('i');        this.next();        return tok;    }    Token processBacksolidus_I() throws ParseException {        throw ex("parser.process.1", this.offset);    }    Token processBacksolidus_g() throws ParseException {        this.next();        return Token.getGraphemePattern();    }    Token processBacksolidus_X() throws ParseException {        this.next();        return Token.getCombiningCharacterSequence();    }    Token processBackreference() throws ParseException {        int refnum = this.chardata-'0';        Token tok = Token.createBackReference(refnum);        this.hasBackReferences = true;        if (this.references == null)  this.references = new Vector();        this.references.addElement(new ReferencePosition(refnum, this.offset-2));        this.next();        return tok;    }    // ----------------------------------------------------------------    /**     * factor ::= ('^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\<' | '\>'     *            | atom (('*' | '+' | '?' | minmax ) '?'? )?)     *            | '(?=' regex ')'  | '(?!' regex ')'  | '(?&lt;=' regex ')'  | '(?&lt;!' regex ')'     *            | '(?#' [^)]* ')'     * minmax ::= '{' min (',' max?)? '}'     * min ::= [0-9]+     * max ::= [0-9]+     */    Token parseFactor() throws ParseException {                int ch = this.read();        Token tok;        switch (ch) {          case T_CARET:         return this.processCaret();          case T_DOLLAR:        return this.processDollar();          case T_LOOKAHEAD:     return this.processLookahead();          case T_NEGATIVELOOKAHEAD: return this.processNegativelookahead();          case T_LOOKBEHIND:    return this.processLookbehind();          case T_NEGATIVELOOKBEHIND: return this.processNegativelookbehind();          case T_COMMENT:            this.next();            return Token.createEmpty();          case T_BACKSOLIDUS:            switch (this.chardata) {              case 'A': return this.processBacksolidus_A();              case 'Z': return this.processBacksolidus_Z();              case 'z': return this.processBacksolidus_z();              case 'b': return this.processBacksolidus_b();              case 'B': return this.processBacksolidus_B();              case '<': return this.processBacksolidus_lt();              case '>': return this.processBacksolidus_gt();            }                                                // through down        }        tok = this.parseAtom();        ch = this.read();        switch (ch) {          case T_STAR:  return this.processStar(tok);          case T_PLUS:  return this.processPlus(tok);          case T_QUESTION: return this.processQuestion(tok);          case T_CHAR:            if (this.chardata == '{' && this.offset < this.regexlen) {                int off = this.offset;          // this.offset -> next of '{'                int min = 0, max = -1;                if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {                    min = ch -'0';                    while (off < this.regexlen                           && (ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {                        min = min*10 +ch-'0';                        if (min < 0)                            throw ex("parser.quantifier.5", this.offset);                    }                }                else {                    throw ex("parser.quantifier.1", this.offset);                }                max = min;                if (ch == ',') {                   if (off >= this.regexlen) {                       throw ex("parser.quantifier.3", this.offset);                   }                   else if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {                                               max = ch -'0';       // {min,max}                        while (off < this.regexlen                               && (ch = this.regex.charAt(off++)) >= '0'                               && ch <= '9') {                            max = max*10 +ch-'0';                            if (max < 0)                                throw ex("parser.quantifier.5", this.offset);                        }                        if (min > max)                            throw ex("parser.quantifier.4", this.offset);                   }                   else { // assume {min,}                        max = -1;                               }                }               if (ch != '}')                   throw ex("parser.quantifier.2", this.offset);               if (this.checkQuestion(off)) {  // off -> next of '}'                    tok = Token.createNGClosure(tok);                    this.offset = off+1;                } else {                    tok = Token.createClosure(tok);                    this.offset = off;                }                tok.setMin(min);                tok.setMax(max);                //System.err.println("CLOSURE: "+min+", "+max);                this.next();            }        }        return tok;    }    /**     * atom ::= char | '.' | char-class | '(' regex ')' | '(?:' regex ')' | '\' [0-9]     *          | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block     *          | '(?>' regex ')'     * char ::= '\\' | '\' [efnrt] | bmp-code | character-1     */    Token parseAtom() throws ParseException {        int ch = this.read();        Token tok = null;        switch (ch) {          case T_LPAREN:        return this.processParen();          case T_LPAREN2:       return this.processParen2(); // '(?:'          case T_CONDITION:     return this.processCondition(); // '(?('          case T_MODIFIERS:     return this.processModifiers(); // (?modifiers ... )          case T_INDEPENDENT:   return this.processIndependent();          case T_DOT:            this.next();                    // Skips '.'            tok = Token.token_dot;            break;            /**             * char-class ::= '[' ( '^'? range ','?)+ ']'             * range ::= '\d' | '\w' | '\s' | category-block | range-char             *           | range-char '-' range-char             * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2             * bmp-char ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]             */          case T_LBRACKET:      return this.parseCharacterClass(true);          case T_SET_OPERATIONS: return this.parseSetOperations();          case T_BACKSOLIDUS:            switch (this.chardata) {              case 'd':  case 'D':              case 'w':  case 'W':              case 's':  case 'S':                tok = this.getTokenForShorthand(this.chardata);                this.next();                return tok;              case 'e':  case 'f':  case 'n':  case 'r':              case 't':  case 'u':  case 'v':  case 'x':                {                    int ch2 = this.decodeEscaped();                    if (ch2 < 0x10000) {                        tok = Token.createChar(ch2);                    } else {                        tok = Token.createString(REUtil.decomposeToSurrogates(ch2));                    }                }                break;              case 'c': return this.processBacksolidus_c();              case 'C': return this.processBacksolidus_C();              case 'i': return this.processBacksolidus_i();              case 'I': return this.processBacksolidus_I();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -