📄 tokenizer.java

📁 Grammatica is a C# and Java parser generator (compiler compiler). It improves upon simlar tools (lik
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
         * @return the latest matched token pattern, or         *         null if no match found         */        public abstract TokenPattern getMatchedPattern();                /**         * Returns the length of the latest match.         *          * @return the length of the latest match, or          *         zero (0) if no match found         */        public abstract int getMatchedLength();        /**         * Checks if the end of string was encountered during the last         * match.          *          * @return true if the end of string was reached, or         *         false otherwise         */        public abstract boolean hasReadEndOfString();    }    /**     * A regular expression token pattern matcher. This class is used     * to match a single regular expression with the tokenizer      * buffer. This class also maintains the state of the last match.      */    private class RegExpTokenMatcher extends TokenMatcher {        /**         * The token pattern to match with.         */        private TokenPattern pattern;                /**         * The regular expression to use.         */        private RegExp regExp;        /**         * The regular expression matcher to use.         */        private Matcher matcher;        /**         * Creates a new regular expression token matcher.         *          * @param pattern        the pattern to match         *          * @throws RegExpException if the regular expression couldn't         *             be created properly         */        public RegExpTokenMatcher(TokenPattern pattern)            throws RegExpException {            this.pattern = pattern;            this.regExp = new RegExp(pattern.getPattern());            this.matcher = regExp.matcher(buffer);        }        /**         * Returns the token pattern.         *          * @return the token pattern         */        public TokenPattern getPattern() {            return pattern;        }        /**         * Returns the start position of the latest match.         *          * @return the start position of the last match, or         *         zero (0) if none found         */        public int start() {            if (matcher.length() <= 0) {                return 0;            } else {                return matcher.start();            }        }                /**         * Returns the latest matched token pattern.         *          * @return the latest matched token pattern, or         *         null if no match found         */        public TokenPattern getMatchedPattern() {            if (matcher.length() <= 0) {                return null;            } else {                return pattern;            }        }                /**         * Returns the length of the latest match.         *          * @return the length of the latest match, or          *         zero (0) if no match found         */        public int getMatchedLength() {            return matcher.length();        }                /**         * Checks if the end of string was encountered during the last         * match.          *          * @return true if the end of string was reached, or         *         false otherwise         */        public boolean hasReadEndOfString() {            return matcher.hasReadEndOfString();        }        /**         * Checks if the token pattern matches the tokenizer buffer           * from the specified position. This method will also reset          * all flags in this matcher.         *          * @param pos            the starting position         *          * @return true if a match was found, or         *         false otherwise         */        public boolean matchFrom(int pos) {            return matcher.matchFrom(pos);        }                /**         * Returns a string representation of this token matcher.         *          * @return a detailed string representation of this matcher         */        public String toString() {            return pattern.toString() + "\n" +                    regExp.toString() + "\n";        }    }    /**     * A string token pattern matcher. This class is used to match a      * set of strings with the tokenizer buffer. This class      * internally uses a DFA for maximum performance. It also      * maintains the state of the last match.      */    private class StringTokenMatcher extends TokenMatcher {        /**         * The list of string token patterns.          */        private ArrayList patterns = new ArrayList();        /**         * The finite automaton to use for matching.         */        private Automaton start = new Automaton();        /**         * The last token pattern match found.         */        private TokenPattern match = null;        /**         * The end of string read flag.         */        private boolean endOfString = false;        /**         * Creates a new string token matcher.         */        public StringTokenMatcher() {        }        /**         * Resets the matcher state. This will clear the results of          * the last match.          */        public void reset() {            match = null;            endOfString = false;        }        /**         * Returns the latest matched token pattern.         *          * @return the latest matched token pattern, or         *         null if no match found         */        public TokenPattern getMatchedPattern() {            return match;        }                /**         * Returns the length of the latest match.         *          * @return the length of the latest match, or          *         zero (0) if no match found         */        public int getMatchedLength() {            if (match == null) {                return 0;            } else {                return match.getPattern().length();            }        }        /**         * Checks if the end of string was encountered during the last         * match.          *          * @return true if the end of string was reached, or         *         false otherwise         */        public boolean hasReadEndOfString() {            return endOfString;         }        /**         * Sets the end of string encountered flag.         */        public void setReadEndOfString() {            endOfString = true;        }        /**         * Returns the token pattern with the specified id. Only          * token patterns handled by this matcher can be returned.          *          * @param id         the token pattern id         *          * @return the token pattern found, or         *         null if not found         */        public TokenPattern getPattern(int id) {            TokenPattern  pattern;            for (int i = 0; i < patterns.size(); i++) {                pattern = (TokenPattern) patterns.get(i);                if (pattern.getId() == id) {                    return pattern;                }            }            return null;        }        /**         * Adds a string token pattern to this matcher.          *          * @param pattern        the pattern to add         */        public void addPattern(TokenPattern pattern) {            patterns.add(pattern);            start.addMatch(pattern.getPattern(), pattern);        }        /**         * Checks if the token pattern matches the tokenizer buffer           * from the specified position. This method will also reset          * all flags in this matcher.         *          * @param pos            the starting position         *          * @return true if a match was found, or         *         false otherwise         */        public boolean matchFrom(int pos) {            reset();            match = (TokenPattern) start.matchFrom(this, pos);            return match != null;        }                /**         * Returns a string representation of this matcher. This will         * contain all the token patterns.         *          * @return a detailed string representation of this matcher          */        public String toString() {            StringBuffer  buffer = new StringBuffer();                        for (int i = 0; i < patterns.size(); i++) {                buffer.append(patterns.get(i));                buffer.append("\n\n");            }            return buffer.toString();        }    }        /**     * A deterministic finite automaton. This is a simple automaton     * for character sequences. It cannot handle character set state     * transitions, but only supports single character transitions.     */    private class Automaton {        /**         * The state value.         */        private Object value = null;                /**         * The automaton state transition tree. Each transition from         * this state to another state is added to this tree with the         * corresponding character.          */        private AutomatonTree tree = new AutomatonTree();                /**         * Creates a new empty automaton.          */        public Automaton() {        }        /**         * Adds a string match to this automaton. New states and          * transitions will be added to extend this automaton to          * support the specified string.         *          * @param str            the string to match         * @param value          the match value         */        public void addMatch(String str, Object value) {            Automaton  state;            if (str.equals("")) {                this.value = value;            } else {                state = tree.find(str.charAt(0));                if (state == null) {                    state = new Automaton();                    state.addMatch(str.substring(1), value);                    tree.add(str.charAt(0), state);                } else {                    state.addMatch(str.substring(1), value);                }            }        }        /**         * Checks if the automaton matches the tokenizer buffer from          * the specified position. This method will set the end of         * buffer flag in the specified token matcher if the end of         * buffer is reached.         *         * @param m              the string token matcher          * @param pos            the starting position         *          * @return the match value, or         *         null if no match is found         */        public Object matchFrom(StringTokenMatcher m, int pos) {            Object     result = null;            Automaton  state;            if (pos >= buffer.length()) {                m.setReadEndOfString();            } else if (tree != null) {                state = tree.find(buffer.charAt(pos));                if (state != null) {                    result = state.matchFrom(m, pos + 1);                }            }            return (result == null) ? value : result;        }    }            /**     * An automaton state transition tree. This class contains a      * binary search tree for the automaton transitions from one      * state to another. All transitions are linked to a single      * character.     */    private class AutomatonTree {        /**         * The transition character. If this value is set to the zero          * ('\0') character, this tree is empty.         */        private char value = '\0';                /**         * The transition state.         */        private Automaton state = null;                /**         * The left subtree.         */        private AutomatonTree left = null;                /**         * The right subtree.         */        private AutomatonTree right = null;                /**         * Creates a new empty automaton transition tree.         */        public AutomatonTree() {        }                /**         * Finds an automaton state from the specified transition          * character. This method searches this transition tree for         * a matching transition.         *          * @param c              the character to search for         *          * @return the automaton state found, or         *         null if no transition exists         */        public Automaton find(char c) {            if (value == '\0' || value == c) {                return state;            } else if (value > c) {                return left.find(c);             } else {                return right.find(c);            }        }                /**         * Adds a transition to this tree.          *          * @param c              the character to transition for         * @param state          the state to transition to         */        public void add(char c, Automaton state) {            if (value == '\0') {                this.value = c;                this.state = state;                this.left = new AutomatonTree();                this.right = new AutomatonTree();            } else if (value > c) {                left.add(c, state);            } else {                right.add(c, state);            }        }    }}
上一页 12
💿 文件大小 619 K
👤 上传用户 laosoler
📂 所属分类编译器/解释器
🏷️ 相关标签

#compiler #Grammatica #generator #improves
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -