streamtokenizer.java

来自「This is a resource based on j2me embedde」· Java 代码 · 共 840 行 · 第 1/2 页

JAVA
840
字号
/* * @(#)StreamTokenizer.java	1.46 06/10/10 * * Copyright  1990-2008 Sun Microsystems, Inc. All Rights Reserved.   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER   *    * This program is free software; you can redistribute it and/or   * modify it under the terms of the GNU General Public License version   * 2 only, as published by the Free Software Foundation.    *    * This program is distributed in the hope that it will be useful, but   * WITHOUT ANY WARRANTY; without even the implied warranty of   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU   * General Public License version 2 for more details (a copy is   * included at /legal/license.txt).    *    * You should have received a copy of the GNU General Public License   * version 2 along with this work; if not, write to the Free Software   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA   * 02110-1301 USA    *    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa   * Clara, CA 95054 or visit www.sun.com if you need additional   * information or have any questions.  * */package java.io;import sun.misc.CVM;/** * The <code>StreamTokenizer</code> class takes an input stream and * parses it into "tokens", allowing the tokens to be * read one at a time. The parsing process is controlled by a table * and a number of flags that can be set to various states. The * stream tokenizer can recognize identifiers, numbers, quoted * strings, and various comment styles. * <p> * Each byte read from the input stream is regarded as a character * in the range <code>'&#92;u0000'</code> through <code>'&#92;u00FF'</code>. * The character value is used to look up five possible attributes of * the character: <i>white space</i>, <i>alphabetic</i>, * <i>numeric</i>, <i>string quote</i>, and <i>comment character</i>. * Each character can have zero or more of these attributes. * <p> * In addition, an instance has four flags. These flags indicate: * <ul> * <li>Whether line terminators are to be returned as tokens or treated *     as white space that merely separates tokens. * <li>Whether C-style comments are to be recognized and skipped. * <li>Whether C++-style comments are to be recognized and skipped. * <li>Whether the characters of identifiers are converted to lowercase. * </ul> * <p> * A typical application first constructs an instance of this class, * sets up the syntax tables, and then repeatedly loops calling the * <code>nextToken</code> method in each iteration of the loop until * it returns the value <code>TT_EOF</code>. * * @author  James Gosling * @version 1.37, 05/03/00 * @see     java.io.StreamTokenizer#nextToken() * @see     java.io.StreamTokenizer#TT_EOF * @since   JDK1.0 */public class StreamTokenizer {    /* Only one of these will be non-null */    private Reader reader = null;    private InputStream input = null;    private char buf[] = new char[20];    /**     * The next character to be considered by the nextToken method.  May also     * be NEED_CHAR to indicate that a new character should be read, or SKIP_LF     * to indicate that a new character should be read and, if it is a '\n'     * character, it should be discarded and a second new character should be     * read.     */    private int peekc = NEED_CHAR;    private static final int NEED_CHAR = Integer.MAX_VALUE;    private static final int SKIP_LF = Integer.MAX_VALUE - 1;    private boolean pushedBack;    private boolean forceLower;    /** The line number of the last token read */    private int LINENO = 1;    private boolean eolIsSignificantP = false;    private boolean slashSlashCommentsP = false;    private boolean slashStarCommentsP = false;    private byte ctype[] = new byte[256];    private static final byte CT_WHITESPACE = 1;    private static final byte CT_DIGIT = 2;    private static final byte CT_ALPHA = 4;    private static final byte CT_QUOTE = 8;    private static final byte CT_COMMENT = 16;    /**     * After a call to the <code>nextToken</code> method, this field     * contains the type of the token just read. For a single character     * token, its value is the single character, converted to an integer.     * For a quoted string token (see , its value is the quote character.     * Otherwise, its value is one of the following:     * <ul>     * <li><code>TT_WORD</code> indicates that the token is a word.     * <li><code>TT_NUMBER</code> indicates that the token is a number.     * <li><code>TT_EOL</code> indicates that the end of line has been read.     *     The field can only have this value if the     *     <code>eolIsSignificant</code> method has been called with the     *     argument <code>true</code>.     * <li><code>TT_EOF</code> indicates that the end of the input stream     *     has been reached.     * </ul>     * <p>     * The initial value of this field is -4.     *     * @see     java.io.StreamTokenizer#eolIsSignificant(boolean)     * @see     java.io.StreamTokenizer#nextToken()     * @see     java.io.StreamTokenizer#quoteChar(int)     * @see     java.io.StreamTokenizer#TT_EOF     * @see     java.io.StreamTokenizer#TT_EOL     * @see     java.io.StreamTokenizer#TT_NUMBER     * @see     java.io.StreamTokenizer#TT_WORD     */    public int ttype = TT_NOTHING;    /**     * A constant indicating that the end of the stream has been read.     */    public static final int TT_EOF = -1;    /**     * A constant indicating that the end of the line has been read.     */    public static final int TT_EOL = '\n';    /**     * A constant indicating that a number token has been read.     */    public static final int TT_NUMBER = -2;    /**     * A constant indicating that a word token has been read.     */    public static final int TT_WORD = -3;    /* A constant indicating that no token has been read, used for     * initializing ttype.  (Perhaps this could be made public and     * made available as the part of the API in a future release?)     */    private static final int TT_NOTHING = -4;    /**     * If the current token is a word token, this field contains a     * string giving the characters of the word token. When the current     * token is a quoted string token, this field contains the body of     * the string.     * <p>     * The current token is a word when the value of the     * <code>ttype</code> field is <code>TT_WORD</code>. The current token is     * a quoted string token when the value of the <code>ttype</code> field is     * a quote character.     * <p>     * The initial value of this field is null.     *     * @see     java.io.StreamTokenizer#quoteChar(int)     * @see     java.io.StreamTokenizer#TT_WORD     * @see     java.io.StreamTokenizer#ttype     */    public String sval;    /**     * If the current token is a number, this field contains the value     * of that number. The current token is a number when the value of     * the <code>ttype</code> field is <code>TT_NUMBER</code>.     * <p>     * The initial value of this field is 0.0.     *     * @see     java.io.StreamTokenizer#TT_NUMBER     * @see     java.io.StreamTokenizer#ttype     */    public double nval;    /** Private constructor that initializes everything except the streams. */    private StreamTokenizer() {	wordChars('a', 'z');	wordChars('A', 'Z');	wordChars(128 + 32, 255);	whitespaceChars(0, ' ');	commentChar('/');	quoteChar('"');	quoteChar('\'');	parseNumbers();    }    /**     * Creates a stream tokenizer that parses the specified input     * stream. The stream tokenizer is initialized to the following     * default state:     * <ul>     * <li>All byte values <code>'A'</code> through <code>'Z'</code>,     *     <code>'a'</code> through <code>'z'</code>, and     *     <code>'&#92;u00A0'</code> through <code>'&#92;u00FF'</code> are     *     considered to be alphabetic.     * <li>All byte values <code>'&#92;u0000'</code> through     *     <code>'&#92;u0020'</code> are considered to be white space.     * <li><code>'/'</code> is a comment character.     * <li>Single quote <code>'&#92;''</code> and double quote <code>'"'</code>     *     are string quote characters.     * <li>Numbers are parsed.     * <li>Ends of lines are treated as white space, not as separate tokens.     * <li>C-style and C++-style comments are not recognized.     * </ul>     *     * deprecated As of JDK version 1.1, the preferred way to tokenize an     * input stream is to convert it into a character stream, for example:     * <blockquote><pre>     *   Reader r = new BufferedReader(new InputStreamReader(is));     *   StreamTokenizer st = new StreamTokenizer(r);     * </pre></blockquote>     *     * @param      is        an input stream.     * @see        java.io.BufferedReader     * @see        java.io.InputStreamReader     * @see        java.io.StreamTokenizer#StreamTokenizer(java.io.Reader)     public StreamTokenizer(InputStream is) {     this();     if (is == null) {            throw new NullPointerException();        }	input = is;        }     */    /**     * Create a tokenizer that parses the given character stream.     *     * @param r  a Reader object providing the input stream.     * @since   JDK1.1     */    public StreamTokenizer(Reader r) {	this();        if (r == null) {            throw new NullPointerException();        }	reader = r;    }    /**     * Resets this tokenizer's syntax table so that all characters are     * "ordinary." See the <code>ordinaryChar</code> method     * for more information on a character being ordinary.     *     * @see     java.io.StreamTokenizer#ordinaryChar(int)     */    public void resetSyntax() {	for (int i = ctype.length; --i >= 0;)	    ctype[i] = 0;    }    /**     * Specifies that all characters <i>c</i> in the range     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>     * are word constituents. A word token consists of a word constituent     * followed by zero or more word constituents or number constituents.     *     * @param   low   the low end of the range.     * @param   hi    the high end of the range.     */    public void wordChars(int low, int hi) {	if (low < 0)	    low = 0;	if (hi >= ctype.length)	    hi = ctype.length - 1;	while (low <= hi)	    ctype[low++] |= CT_ALPHA;    }    /**     * Specifies that all characters <i>c</i> in the range     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>     * are white space characters. White space characters serve only to     * separate tokens in the input stream.     *     * <p>Any other attribute settings for the characters in the specified     * range are cleared.     *     * @param   low   the low end of the range.     * @param   hi    the high end of the range.     */    public void whitespaceChars(int low, int hi) {	if (low < 0)	    low = 0;	if (hi >= ctype.length)	    hi = ctype.length - 1;	while (low <= hi)	    ctype[low++] = CT_WHITESPACE;    }    /**     * Specifies that all characters <i>c</i> in the range     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>     * are "ordinary" in this tokenizer. See the     * <code>ordinaryChar</code> method for more information on a     * character being ordinary.     *     * @param   low   the low end of the range.     * @param   hi    the high end of the range.     * @see     java.io.StreamTokenizer#ordinaryChar(int)     */    public void ordinaryChars(int low, int hi) {	if (low < 0)	    low = 0;	if (hi >= ctype.length)	    hi = ctype.length - 1;	while (low <= hi)	    ctype[low++] = 0;    }    /**     * Specifies that the character argument is "ordinary"     * in this tokenizer. It removes any special significance the     * character has as a comment character, word component, string     * delimiter, white space, or number character. When such a character     * is encountered by the parser, the parser treates it as a     * single-character token and sets <code>ttype</code> field to the     * character value.     *     * @param   ch   the character.     * @see     java.io.StreamTokenizer#ttype     */    public void ordinaryChar(int ch) {        if (ch >= 0 && ch < ctype.length)  	    ctype[ch] = 0;    }    /**     * Specified that the character argument starts a single-line     * comment. All characters from the comment character to the end of     * the line are ignored by this stream tokenizer.     *     * <p>Any other attribute settings for the specified character are cleared.     *     * @param   ch   the character.     */    public void commentChar(int ch) {        if (ch >= 0 && ch < ctype.length)	    ctype[ch] = CT_COMMENT;    }    /**     * Specifies that matching pairs of this character delimit string     * constants in this tokenizer.     * <p>     * When the <code>nextToken</code> method encounters a string     * constant, the <code>ttype</code> field is set to the string     * delimiter and the <code>sval</code> field is set to the body of     * the string.     * <p>     * If a string quote character is encountered, then a string is     * recognized, consisting of all characters after (but not including)     * the string quote character, up to (but not including) the next     * occurrence of that same string quote character, or a line     * terminator, or end of file. The usual escape sequences such as     * <code>"&#92;n"</code> and <code>"&#92;t"</code> are recognized and     * converted to single characters as the string is parsed.     *     * <p>Any other attribute settings for the specified character are cleared.     *     * @param   ch   the character.     * @see     java.io.StreamTokenizer#nextToken()     * @see     java.io.StreamTokenizer#sval     * @see     java.io.StreamTokenizer#ttype     */    public void quoteChar(int ch) {        if (ch >= 0 && ch < ctype.length) 	    ctype[ch] = CT_QUOTE;    }    /**     * Specifies that numbers should be parsed by this tokenizer. The     * syntax table of this tokenizer is modified so that each of the twelve     * characters:     * <blockquote><pre>     *      0 1 2 3 4 5 6 7 8 9 . -     * </pre></blockquote>     * <p>     * has the "numeric" attribute.     * <p>     * When the parser encounters a word token that has the format of a     * double precision floating-point number, it treats the token as a     * number rather than a word, by setting the the <code>ttype</code>     * field to the value <code>TT_NUMBER</code> and putting the numeric     * value of the token into the <code>nval</code> field.     *     * @see     java.io.StreamTokenizer#nval     * @see     java.io.StreamTokenizer#TT_NUMBER     * @see     java.io.StreamTokenizer#ttype     */    public void parseNumbers() {	for (int i = '0'; i <= '9'; i++)	    ctype[i] |= CT_DIGIT;	ctype['.'] |= CT_DIGIT;	ctype['-'] |= CT_DIGIT;    }    /**     * Determines whether or not ends of line are treated as tokens.     * If the flag argument is true, this tokenizer treats end of lines     * as tokens; the <code>nextToken</code> method returns     * <code>TT_EOL</code> and also sets the <code>ttype</code> field to     * this value when an end of line is read.     * <p>     * A line is a sequence of characters ending with either a

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?