📄 regexparser.java

📁 java1.6众多例子参考
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * Copyright 1999-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package com.sun.org.apache.xerces.internal.impl.xpath.regex;import java.util.Locale;import java.util.MissingResourceException;import java.util.ResourceBundle;import java.util.Vector;/** * A Regular Expression Parser. *  * @xerces.internal * * @version $Id: RegexParser.java,v 1.2.6.1 2005/09/06 11:46:34 neerajbj Exp $ */class RegexParser {    static final int T_CHAR = 0;    static final int T_EOF = 1;    static final int T_OR = 2;                  // '|'    static final int T_STAR = 3;                // '*'    static final int T_PLUS = 4;                // '+'    static final int T_QUESTION = 5;            // '?'    static final int T_LPAREN = 6;              // '('    static final int T_RPAREN = 7;              // ')'    static final int T_DOT = 8;                 // '.'    static final int T_LBRACKET = 9;            // '['    static final int T_BACKSOLIDUS = 10;        // '\'    static final int T_CARET = 11;              // '^'    static final int T_DOLLAR = 12;             // '$'    static final int T_LPAREN2 = 13;            // '(?:'    static final int T_LOOKAHEAD = 14;          // '(?='    static final int T_NEGATIVELOOKAHEAD = 15;  // '(?!'    static final int T_LOOKBEHIND = 16;         // '(?<='    static final int T_NEGATIVELOOKBEHIND = 17; // '(?<!'    static final int T_INDEPENDENT = 18;        // '(?>'    static final int T_SET_OPERATIONS = 19;     // '(?['    static final int T_POSIX_CHARCLASS_START = 20; // '[:' in a character class    static final int T_COMMENT = 21;            // '(?#'    static final int T_MODIFIERS = 22;          // '(?' [\-,a-z,A-Z]    static final int T_CONDITION = 23;          // '(?('    static final int T_XMLSCHEMA_CC_SUBTRACTION = 24; // '-[' in a character class    static class ReferencePosition {        int refNumber;        int position;        ReferencePosition(int n, int pos) {            this.refNumber = n;            this.position = pos;        }    }    int offset;    String regex;    int regexlen;    int options;    ResourceBundle resources;    int chardata;    int nexttoken;    static protected final int S_NORMAL = 0;    static protected final int S_INBRACKETS = 1;    static protected final int S_INXBRACKETS = 2;    int context = S_NORMAL;    int parennumber = 1;    boolean hasBackReferences;    Vector references = null;    public RegexParser() {        this.setLocale(Locale.getDefault());    }    public RegexParser(Locale locale) {        this.setLocale(locale);    }    public void setLocale(Locale locale) {        try {            this.resources = ResourceBundle.getBundle("com.sun.org.apache.xerces.internal.impl.xpath.regex.message", locale);        } catch (MissingResourceException mre) {            throw new RuntimeException("Installation Problem???  Couldn't load messages: "                                       +mre.getMessage());        }    }    final ParseException ex(String key, int loc) {        return new ParseException(this.resources.getString(key), loc);    }    private final boolean isSet(int flag) {        return (this.options & flag) == flag;    }    synchronized Token parse(String regex, int options) throws ParseException {        this.options = options;        this.offset = 0;        this.setContext(S_NORMAL);        this.parennumber = 1;        this.hasBackReferences = false;        this.regex = regex;        if (this.isSet(RegularExpression.EXTENDED_COMMENT))            this.regex = REUtil.stripExtendedComment(this.regex);        this.regexlen = this.regex.length();        this.next();        Token ret = this.parseRegex();        if (this.offset != this.regexlen)            throw ex("parser.parse.1", this.offset);        if (this.references != null) {            for (int i = 0;  i < this.references.size();  i ++) {                ReferencePosition position = (ReferencePosition)this.references.elementAt(i);                if (this.parennumber <= position.refNumber)                    throw ex("parser.parse.2", position.position);            }            this.references.removeAllElements();        }        return ret;    }    /*    public RegularExpression createRegex(String regex, int options) throws ParseException {        Token tok = this.parse(regex, options);        return new RegularExpression(regex, tok, this.parennumber, this.hasBackReferences, options);    }    */    protected final void setContext(int con) {        this.context = con;    }    final int read() {        return this.nexttoken;    }    final void next() {        if (this.offset >= this.regexlen) {            this.chardata = -1;            this.nexttoken = T_EOF;            return;        }        int ret;        int ch = this.regex.charAt(this.offset++);        this.chardata = ch;        if (this.context == S_INBRACKETS) {            // In a character class, this.chardata has one character, that is to say,            // a pair of surrogates is composed and stored to this.chardata.            switch (ch) {              case '\\':                ret = T_BACKSOLIDUS;                if (this.offset >= this.regexlen)                    throw ex("parser.next.1", this.offset-1);                this.chardata = this.regex.charAt(this.offset++);                break;              case '-':                if (this.isSet(RegularExpression.XMLSCHEMA_MODE)                    && this.offset < this.regexlen && this.regex.charAt(this.offset) == '[') {                    this.offset++;                    ret = T_XMLSCHEMA_CC_SUBTRACTION;                } else                    ret = T_CHAR;                break;              case '[':                if (!this.isSet(RegularExpression.XMLSCHEMA_MODE)                    && this.offset < this.regexlen && this.regex.charAt(this.offset) == ':') {                    this.offset++;                    ret = T_POSIX_CHARCLASS_START;                    break;                } // Through down              default:                if (REUtil.isHighSurrogate(ch) && this.offset < this.regexlen) {                    int low = this.regex.charAt(this.offset);                    if (REUtil.isLowSurrogate(low)) {                        this.chardata = REUtil.composeFromSurrogates(ch, low);                        this.offset ++;                    }                }                ret = T_CHAR;            }            this.nexttoken = ret;            return;        }        switch (ch) {          case '|': ret = T_OR;             break;          case '*': ret = T_STAR;           break;          case '+': ret = T_PLUS;           break;          case '?': ret = T_QUESTION;       break;          case ')': ret = T_RPAREN;         break;          case '.': ret = T_DOT;            break;          case '[': ret = T_LBRACKET;       break;          case '^': ret = T_CARET;          break;          case '$': ret = T_DOLLAR;         break;          case '(':            ret = T_LPAREN;            if (this.offset >= this.regexlen)                break;            if (this.regex.charAt(this.offset) != '?')                break;            if (++this.offset >= this.regexlen)                throw ex("parser.next.2", this.offset-1);            ch = this.regex.charAt(this.offset++);            switch (ch) {              case ':':  ret = T_LPAREN2;            break;              case '=':  ret = T_LOOKAHEAD;          break;              case '!':  ret = T_NEGATIVELOOKAHEAD;  break;              case '[':  ret = T_SET_OPERATIONS;     break;              case '>':  ret = T_INDEPENDENT;        break;              case '<':                if (this.offset >= this.regexlen)                    throw ex("parser.next.2", this.offset-3);                ch = this.regex.charAt(this.offset++);                if (ch == '=') {                    ret = T_LOOKBEHIND;                } else if (ch == '!') {                    ret = T_NEGATIVELOOKBEHIND;                } else                    throw ex("parser.next.3", this.offset-3);                break;              case '#':                while (this.offset < this.regexlen) {                    ch = this.regex.charAt(this.offset++);                    if (ch == ')')  break;                }                if (ch != ')')                    throw ex("parser.next.4", this.offset-1);                ret = T_COMMENT;                break;              default:                if (ch == '-' || 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z') {// Options                    this.offset --;                    ret = T_MODIFIERS;                    break;                } else if (ch == '(') {         // conditional                    ret = T_CONDITION;          // this.offsets points the next of '('.                    break;                }                throw ex("parser.next.2", this.offset-2);            }            break;                      case '\\':            ret = T_BACKSOLIDUS;            if (this.offset >= this.regexlen)                throw ex("parser.next.1", this.offset-1);            this.chardata = this.regex.charAt(this.offset++);            break;          default:            ret = T_CHAR;        }        this.nexttoken = ret;    }    /**     * regex ::= term (`|` term)*     * term ::= factor+     * factor ::= ('^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\<' | '\>'     *            | atom (('*' | '+' | '?' | minmax ) '?'? )?)     *            | '(?=' regex ')'  | '(?!' regex ')'  | '(?&lt;=' regex ')'  | '(?&lt;!' regex ')'     * atom ::= char | '.' | range | '(' regex ')' | '(?:' regex ')' | '\' [0-9]     *          | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block      */    Token parseRegex() throws ParseException {        Token tok = this.parseTerm();        Token parent = null;        while (this.read() == T_OR) {            this.next();                    // '|'            if (parent == null) {                parent = Token.createUnion();                parent.addChild(tok);                tok = parent;            }            tok.addChild(this.parseTerm());        }        return tok;    }    /**     * term ::= factor+     */    Token parseTerm() throws ParseException {        int ch = this.read();        if (ch == T_OR || ch == T_RPAREN || ch == T_EOF) {            return Token.createEmpty();        } else {            Token tok = this.parseFactor();            Token concat = null;            while ((ch = this.read()) != T_OR && ch != T_RPAREN && ch != T_EOF) {                if (concat == null) {                    concat = Token.createConcat();                    concat.addChild(tok);                    tok = concat;                }                concat.addChild(this.parseFactor());                //tok = Token.createConcat(tok, this.parseFactor());            }            return tok;        }    }    // ----------------------------------------------------------------    Token processCaret() throws ParseException {        this.next();        return Token.token_linebeginning;    }    Token processDollar() throws ParseException {        this.next();        return Token.token_lineend;    }    Token processLookahead() throws ParseException {        this.next();        Token tok = Token.createLook(Token.LOOKAHEAD, this.parseRegex());        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();                            // ')'        return tok;    }    Token processNegativelookahead() throws ParseException {        this.next();        Token tok = Token.createLook(Token.NEGATIVELOOKAHEAD, this.parseRegex());        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();                            // ')'        return tok;    }    Token processLookbehind() throws ParseException {        this.next();        Token tok = Token.createLook(Token.LOOKBEHIND, this.parseRegex());        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();                            // ')'        return tok;    }    Token processNegativelookbehind() throws ParseException {        this.next();        Token tok = Token.createLook(Token.NEGATIVELOOKBEHIND, this.parseRegex());        if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);        this.next();                    // ')'        return tok;    }    Token processBacksolidus_A() throws ParseException {        this.next();        return Token.token_stringbeginning;    }    Token processBacksolidus_Z() throws ParseException {        this.next();        return Token.token_stringend2;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -