📄 tokenizer.java
字号:
* @return the latest matched token pattern, or * null if no match found */ public abstract TokenPattern getMatchedPattern(); /** * Returns the length of the latest match. * * @return the length of the latest match, or * zero (0) if no match found */ public abstract int getMatchedLength(); /** * Checks if the end of string was encountered during the last * match. * * @return true if the end of string was reached, or * false otherwise */ public abstract boolean hasReadEndOfString(); } /** * A regular expression token pattern matcher. This class is used * to match a single regular expression with the tokenizer * buffer. This class also maintains the state of the last match. */ private class RegExpTokenMatcher extends TokenMatcher { /** * The token pattern to match with. */ private TokenPattern pattern; /** * The regular expression to use. */ private RegExp regExp; /** * The regular expression matcher to use. */ private Matcher matcher; /** * Creates a new regular expression token matcher. * * @param pattern the pattern to match * * @throws RegExpException if the regular expression couldn't * be created properly */ public RegExpTokenMatcher(TokenPattern pattern) throws RegExpException { this.pattern = pattern; this.regExp = new RegExp(pattern.getPattern()); this.matcher = regExp.matcher(buffer); } /** * Returns the token pattern. * * @return the token pattern */ public TokenPattern getPattern() { return pattern; } /** * Returns the start position of the latest match. * * @return the start position of the last match, or * zero (0) if none found */ public int start() { if (matcher.length() <= 0) { return 0; } else { return matcher.start(); } } /** * Returns the latest matched token pattern. * * @return the latest matched token pattern, or * null if no match found */ public TokenPattern getMatchedPattern() { if (matcher.length() <= 0) { return null; } else { return pattern; } } /** * Returns the length of the latest match. * * @return the length of the latest match, or * zero (0) if no match found */ public int getMatchedLength() { return matcher.length(); } /** * Checks if the end of string was encountered during the last * match. * * @return true if the end of string was reached, or * false otherwise */ public boolean hasReadEndOfString() { return matcher.hasReadEndOfString(); } /** * Checks if the token pattern matches the tokenizer buffer * from the specified position. This method will also reset * all flags in this matcher. * * @param pos the starting position * * @return true if a match was found, or * false otherwise */ public boolean matchFrom(int pos) { return matcher.matchFrom(pos); } /** * Returns a string representation of this token matcher. * * @return a detailed string representation of this matcher */ public String toString() { return pattern.toString() + "\n" + regExp.toString() + "\n"; } } /** * A string token pattern matcher. This class is used to match a * set of strings with the tokenizer buffer. This class * internally uses a DFA for maximum performance. It also * maintains the state of the last match. */ private class StringTokenMatcher extends TokenMatcher { /** * The list of string token patterns. */ private ArrayList patterns = new ArrayList(); /** * The finite automaton to use for matching. */ private Automaton start = new Automaton(); /** * The last token pattern match found. */ private TokenPattern match = null; /** * The end of string read flag. */ private boolean endOfString = false; /** * Creates a new string token matcher. */ public StringTokenMatcher() { } /** * Resets the matcher state. This will clear the results of * the last match. */ public void reset() { match = null; endOfString = false; } /** * Returns the latest matched token pattern. * * @return the latest matched token pattern, or * null if no match found */ public TokenPattern getMatchedPattern() { return match; } /** * Returns the length of the latest match. * * @return the length of the latest match, or * zero (0) if no match found */ public int getMatchedLength() { if (match == null) { return 0; } else { return match.getPattern().length(); } } /** * Checks if the end of string was encountered during the last * match. * * @return true if the end of string was reached, or * false otherwise */ public boolean hasReadEndOfString() { return endOfString; } /** * Sets the end of string encountered flag. */ public void setReadEndOfString() { endOfString = true; } /** * Returns the token pattern with the specified id. Only * token patterns handled by this matcher can be returned. * * @param id the token pattern id * * @return the token pattern found, or * null if not found */ public TokenPattern getPattern(int id) { TokenPattern pattern; for (int i = 0; i < patterns.size(); i++) { pattern = (TokenPattern) patterns.get(i); if (pattern.getId() == id) { return pattern; } } return null; } /** * Adds a string token pattern to this matcher. * * @param pattern the pattern to add */ public void addPattern(TokenPattern pattern) { patterns.add(pattern); start.addMatch(pattern.getPattern(), pattern); } /** * Checks if the token pattern matches the tokenizer buffer * from the specified position. This method will also reset * all flags in this matcher. * * @param pos the starting position * * @return true if a match was found, or * false otherwise */ public boolean matchFrom(int pos) { reset(); match = (TokenPattern) start.matchFrom(this, pos); return match != null; } /** * Returns a string representation of this matcher. This will * contain all the token patterns. * * @return a detailed string representation of this matcher */ public String toString() { StringBuffer buffer = new StringBuffer(); for (int i = 0; i < patterns.size(); i++) { buffer.append(patterns.get(i)); buffer.append("\n\n"); } return buffer.toString(); } } /** * A deterministic finite automaton. This is a simple automaton * for character sequences. It cannot handle character set state * transitions, but only supports single character transitions. */ private class Automaton { /** * The state value. */ private Object value = null; /** * The automaton state transition tree. Each transition from * this state to another state is added to this tree with the * corresponding character. */ private AutomatonTree tree = new AutomatonTree(); /** * Creates a new empty automaton. */ public Automaton() { } /** * Adds a string match to this automaton. New states and * transitions will be added to extend this automaton to * support the specified string. * * @param str the string to match * @param value the match value */ public void addMatch(String str, Object value) { Automaton state; if (str.equals("")) { this.value = value; } else { state = tree.find(str.charAt(0)); if (state == null) { state = new Automaton(); state.addMatch(str.substring(1), value); tree.add(str.charAt(0), state); } else { state.addMatch(str.substring(1), value); } } } /** * Checks if the automaton matches the tokenizer buffer from * the specified position. This method will set the end of * buffer flag in the specified token matcher if the end of * buffer is reached. * * @param m the string token matcher * @param pos the starting position * * @return the match value, or * null if no match is found */ public Object matchFrom(StringTokenMatcher m, int pos) { Object result = null; Automaton state; if (pos >= buffer.length()) { m.setReadEndOfString(); } else if (tree != null) { state = tree.find(buffer.charAt(pos)); if (state != null) { result = state.matchFrom(m, pos + 1); } } return (result == null) ? value : result; } } /** * An automaton state transition tree. This class contains a * binary search tree for the automaton transitions from one * state to another. All transitions are linked to a single * character. */ private class AutomatonTree { /** * The transition character. If this value is set to the zero * ('\0') character, this tree is empty. */ private char value = '\0'; /** * The transition state. */ private Automaton state = null; /** * The left subtree. */ private AutomatonTree left = null; /** * The right subtree. */ private AutomatonTree right = null; /** * Creates a new empty automaton transition tree. */ public AutomatonTree() { } /** * Finds an automaton state from the specified transition * character. This method searches this transition tree for * a matching transition. * * @param c the character to search for * * @return the automaton state found, or * null if no transition exists */ public Automaton find(char c) { if (value == '\0' || value == c) { return state; } else if (value > c) { return left.find(c); } else { return right.find(c); } } /** * Adds a transition to this tree. * * @param c the character to transition for * @param state the state to transition to */ public void add(char c, Automaton state) { if (value == '\0') { this.value = c; this.state = state; this.left = new AutomatonTree(); this.right = new AutomatonTree(); } else if (value > c) { left.add(c, state); } else { right.add(c, state); } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -