streamtokenizer.java
来自「This is a resource based on j2me embedde」· Java 代码 · 共 840 行 · 第 1/2 页
JAVA
840 行
/* * @(#)StreamTokenizer.java 1.46 06/10/10 * * Copyright 1990-2008 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 only, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is * included at /legal/license.txt). * * You should have received a copy of the GNU General Public License * version 2 along with this work; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa * Clara, CA 95054 or visit www.sun.com if you need additional * information or have any questions. * */package java.io;import sun.misc.CVM;/** * The <code>StreamTokenizer</code> class takes an input stream and * parses it into "tokens", allowing the tokens to be * read one at a time. The parsing process is controlled by a table * and a number of flags that can be set to various states. The * stream tokenizer can recognize identifiers, numbers, quoted * strings, and various comment styles. * <p> * Each byte read from the input stream is regarded as a character * in the range <code>'\u0000'</code> through <code>'\u00FF'</code>. * The character value is used to look up five possible attributes of * the character: <i>white space</i>, <i>alphabetic</i>, * <i>numeric</i>, <i>string quote</i>, and <i>comment character</i>. * Each character can have zero or more of these attributes. * <p> * In addition, an instance has four flags. These flags indicate: * <ul> * <li>Whether line terminators are to be returned as tokens or treated * as white space that merely separates tokens. * <li>Whether C-style comments are to be recognized and skipped. * <li>Whether C++-style comments are to be recognized and skipped. * <li>Whether the characters of identifiers are converted to lowercase. * </ul> * <p> * A typical application first constructs an instance of this class, * sets up the syntax tables, and then repeatedly loops calling the * <code>nextToken</code> method in each iteration of the loop until * it returns the value <code>TT_EOF</code>. * * @author James Gosling * @version 1.37, 05/03/00 * @see java.io.StreamTokenizer#nextToken() * @see java.io.StreamTokenizer#TT_EOF * @since JDK1.0 */public class StreamTokenizer { /* Only one of these will be non-null */ private Reader reader = null; private InputStream input = null; private char buf[] = new char[20]; /** * The next character to be considered by the nextToken method. May also * be NEED_CHAR to indicate that a new character should be read, or SKIP_LF * to indicate that a new character should be read and, if it is a '\n' * character, it should be discarded and a second new character should be * read. */ private int peekc = NEED_CHAR; private static final int NEED_CHAR = Integer.MAX_VALUE; private static final int SKIP_LF = Integer.MAX_VALUE - 1; private boolean pushedBack; private boolean forceLower; /** The line number of the last token read */ private int LINENO = 1; private boolean eolIsSignificantP = false; private boolean slashSlashCommentsP = false; private boolean slashStarCommentsP = false; private byte ctype[] = new byte[256]; private static final byte CT_WHITESPACE = 1; private static final byte CT_DIGIT = 2; private static final byte CT_ALPHA = 4; private static final byte CT_QUOTE = 8; private static final byte CT_COMMENT = 16; /** * After a call to the <code>nextToken</code> method, this field * contains the type of the token just read. For a single character * token, its value is the single character, converted to an integer. * For a quoted string token (see , its value is the quote character. * Otherwise, its value is one of the following: * <ul> * <li><code>TT_WORD</code> indicates that the token is a word. * <li><code>TT_NUMBER</code> indicates that the token is a number. * <li><code>TT_EOL</code> indicates that the end of line has been read. * The field can only have this value if the * <code>eolIsSignificant</code> method has been called with the * argument <code>true</code>. * <li><code>TT_EOF</code> indicates that the end of the input stream * has been reached. * </ul> * <p> * The initial value of this field is -4. * * @see java.io.StreamTokenizer#eolIsSignificant(boolean) * @see java.io.StreamTokenizer#nextToken() * @see java.io.StreamTokenizer#quoteChar(int) * @see java.io.StreamTokenizer#TT_EOF * @see java.io.StreamTokenizer#TT_EOL * @see java.io.StreamTokenizer#TT_NUMBER * @see java.io.StreamTokenizer#TT_WORD */ public int ttype = TT_NOTHING; /** * A constant indicating that the end of the stream has been read. */ public static final int TT_EOF = -1; /** * A constant indicating that the end of the line has been read. */ public static final int TT_EOL = '\n'; /** * A constant indicating that a number token has been read. */ public static final int TT_NUMBER = -2; /** * A constant indicating that a word token has been read. */ public static final int TT_WORD = -3; /* A constant indicating that no token has been read, used for * initializing ttype. (Perhaps this could be made public and * made available as the part of the API in a future release?) */ private static final int TT_NOTHING = -4; /** * If the current token is a word token, this field contains a * string giving the characters of the word token. When the current * token is a quoted string token, this field contains the body of * the string. * <p> * The current token is a word when the value of the * <code>ttype</code> field is <code>TT_WORD</code>. The current token is * a quoted string token when the value of the <code>ttype</code> field is * a quote character. * <p> * The initial value of this field is null. * * @see java.io.StreamTokenizer#quoteChar(int) * @see java.io.StreamTokenizer#TT_WORD * @see java.io.StreamTokenizer#ttype */ public String sval; /** * If the current token is a number, this field contains the value * of that number. The current token is a number when the value of * the <code>ttype</code> field is <code>TT_NUMBER</code>. * <p> * The initial value of this field is 0.0. * * @see java.io.StreamTokenizer#TT_NUMBER * @see java.io.StreamTokenizer#ttype */ public double nval; /** Private constructor that initializes everything except the streams. */ private StreamTokenizer() { wordChars('a', 'z'); wordChars('A', 'Z'); wordChars(128 + 32, 255); whitespaceChars(0, ' '); commentChar('/'); quoteChar('"'); quoteChar('\''); parseNumbers(); } /** * Creates a stream tokenizer that parses the specified input * stream. The stream tokenizer is initialized to the following * default state: * <ul> * <li>All byte values <code>'A'</code> through <code>'Z'</code>, * <code>'a'</code> through <code>'z'</code>, and * <code>'\u00A0'</code> through <code>'\u00FF'</code> are * considered to be alphabetic. * <li>All byte values <code>'\u0000'</code> through * <code>'\u0020'</code> are considered to be white space. * <li><code>'/'</code> is a comment character. * <li>Single quote <code>'\''</code> and double quote <code>'"'</code> * are string quote characters. * <li>Numbers are parsed. * <li>Ends of lines are treated as white space, not as separate tokens. * <li>C-style and C++-style comments are not recognized. * </ul> * * deprecated As of JDK version 1.1, the preferred way to tokenize an * input stream is to convert it into a character stream, for example: * <blockquote><pre> * Reader r = new BufferedReader(new InputStreamReader(is)); * StreamTokenizer st = new StreamTokenizer(r); * </pre></blockquote> * * @param is an input stream. * @see java.io.BufferedReader * @see java.io.InputStreamReader * @see java.io.StreamTokenizer#StreamTokenizer(java.io.Reader) public StreamTokenizer(InputStream is) { this(); if (is == null) { throw new NullPointerException(); } input = is; } */ /** * Create a tokenizer that parses the given character stream. * * @param r a Reader object providing the input stream. * @since JDK1.1 */ public StreamTokenizer(Reader r) { this(); if (r == null) { throw new NullPointerException(); } reader = r; } /** * Resets this tokenizer's syntax table so that all characters are * "ordinary." See the <code>ordinaryChar</code> method * for more information on a character being ordinary. * * @see java.io.StreamTokenizer#ordinaryChar(int) */ public void resetSyntax() { for (int i = ctype.length; --i >= 0;) ctype[i] = 0; } /** * Specifies that all characters <i>c</i> in the range * <code>low <= <i>c</i> <= high</code> * are word constituents. A word token consists of a word constituent * followed by zero or more word constituents or number constituents. * * @param low the low end of the range. * @param hi the high end of the range. */ public void wordChars(int low, int hi) { if (low < 0) low = 0; if (hi >= ctype.length) hi = ctype.length - 1; while (low <= hi) ctype[low++] |= CT_ALPHA; } /** * Specifies that all characters <i>c</i> in the range * <code>low <= <i>c</i> <= high</code> * are white space characters. White space characters serve only to * separate tokens in the input stream. * * <p>Any other attribute settings for the characters in the specified * range are cleared. * * @param low the low end of the range. * @param hi the high end of the range. */ public void whitespaceChars(int low, int hi) { if (low < 0) low = 0; if (hi >= ctype.length) hi = ctype.length - 1; while (low <= hi) ctype[low++] = CT_WHITESPACE; } /** * Specifies that all characters <i>c</i> in the range * <code>low <= <i>c</i> <= high</code> * are "ordinary" in this tokenizer. See the * <code>ordinaryChar</code> method for more information on a * character being ordinary. * * @param low the low end of the range. * @param hi the high end of the range. * @see java.io.StreamTokenizer#ordinaryChar(int) */ public void ordinaryChars(int low, int hi) { if (low < 0) low = 0; if (hi >= ctype.length) hi = ctype.length - 1; while (low <= hi) ctype[low++] = 0; } /** * Specifies that the character argument is "ordinary" * in this tokenizer. It removes any special significance the * character has as a comment character, word component, string * delimiter, white space, or number character. When such a character * is encountered by the parser, the parser treates it as a * single-character token and sets <code>ttype</code> field to the * character value. * * @param ch the character. * @see java.io.StreamTokenizer#ttype */ public void ordinaryChar(int ch) { if (ch >= 0 && ch < ctype.length) ctype[ch] = 0; } /** * Specified that the character argument starts a single-line * comment. All characters from the comment character to the end of * the line are ignored by this stream tokenizer. * * <p>Any other attribute settings for the specified character are cleared. * * @param ch the character. */ public void commentChar(int ch) { if (ch >= 0 && ch < ctype.length) ctype[ch] = CT_COMMENT; } /** * Specifies that matching pairs of this character delimit string * constants in this tokenizer. * <p> * When the <code>nextToken</code> method encounters a string * constant, the <code>ttype</code> field is set to the string * delimiter and the <code>sval</code> field is set to the body of * the string. * <p> * If a string quote character is encountered, then a string is * recognized, consisting of all characters after (but not including) * the string quote character, up to (but not including) the next * occurrence of that same string quote character, or a line * terminator, or end of file. The usual escape sequences such as * <code>"\n"</code> and <code>"\t"</code> are recognized and * converted to single characters as the string is parsed. * * <p>Any other attribute settings for the specified character are cleared. * * @param ch the character. * @see java.io.StreamTokenizer#nextToken() * @see java.io.StreamTokenizer#sval * @see java.io.StreamTokenizer#ttype */ public void quoteChar(int ch) { if (ch >= 0 && ch < ctype.length) ctype[ch] = CT_QUOTE; } /** * Specifies that numbers should be parsed by this tokenizer. The * syntax table of this tokenizer is modified so that each of the twelve * characters: * <blockquote><pre> * 0 1 2 3 4 5 6 7 8 9 . - * </pre></blockquote> * <p> * has the "numeric" attribute. * <p> * When the parser encounters a word token that has the format of a * double precision floating-point number, it treats the token as a * number rather than a word, by setting the the <code>ttype</code> * field to the value <code>TT_NUMBER</code> and putting the numeric * value of the token into the <code>nval</code> field. * * @see java.io.StreamTokenizer#nval * @see java.io.StreamTokenizer#TT_NUMBER * @see java.io.StreamTokenizer#ttype */ public void parseNumbers() { for (int i = '0'; i <= '9'; i++) ctype[i] |= CT_DIGIT; ctype['.'] |= CT_DIGIT; ctype['-'] |= CT_DIGIT; } /** * Determines whether or not ends of line are treated as tokens. * If the flag argument is true, this tokenizer treats end of lines * as tokens; the <code>nextToken</code> method returns * <code>TT_EOL</code> and also sets the <code>ttype</code> field to * this value when an end of line is read. * <p> * A line is a sequence of characters ending with either a
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?