tokenizer.java

来自「Practical AI in Java 人工智能编程」· Java 代码 · 共 67 行

JAVA
67
字号
import java.io.StreamTokenizer;import java.io.StringReader;import java.io.Reader;import java.util.Vector;public class Tokenizer {    static public Vector getTokens(String ss) throws Exception {        return getTokens(new StringReader(ss));    }    static public Vector getTokens(Reader reader) throws Exception {        Vector words = new Vector();        StreamTokenizer str_tok = new StreamTokenizer(reader);        while (str_tok.nextToken() !=                StreamTokenizer.TT_EOF) {            String s;            switch (str_tok.ttype) {                case StreamTokenizer.TT_EOL:                    s = ""; // ignore this                    break;                case StreamTokenizer.TT_WORD:                    s = str_tok.sval;                    break;                case StreamTokenizer.TT_NUMBER:                    s = ""; // ignore this                    break;                default:                    s = String.valueOf((char) str_tok.ttype);            }            if (s.startsWith("$")) continue;            if (s.indexOf("-") > -1) continue;            s = s.toLowerCase();            s = s.replace('"', ' ').replace('%', ' ').replace('(', ' ');            s = s.replace(')', ' ').replace('\'', ' ');            s = s.trim();            if (s.indexOf(" ") != -1) continue;            if (s.endsWith(".")) {                s = s.substring(0, s.length() - 1);            } else if (s.endsWith(",")) {                s = s.substring(0, s.length() - 1);            } else if (s.endsWith(";")) {                s = s.substring(0, s.length() - 1);            } else if (s.endsWith(":")) {                s = s.substring(0, s.length() - 1);            }            if (s.length() < 2) continue;            if (s.indexOf(".") > -1) continue;            if (s.indexOf("0") > -1) continue;            if (s.indexOf("1") > -1) continue;            if (s.indexOf("2") > -1) continue;            if (s.indexOf("3") > -1) continue;            if (s.indexOf("4") > -1) continue;            if (s.indexOf("5") > -1) continue;            if (s.indexOf("6") > -1) continue;            if (s.indexOf("7") > -1) continue;            if (s.indexOf("8") > -1) continue;            if (s.indexOf("9") > -1) continue;            words.add(s.toLowerCase());        }        return words;    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?