streamtokenizer.java

来自「《移动Agent技术》一书的所有章节源代码。」· Java 代码 · 共 757 行 · 第 1/2 页

JAVA
757
字号
/*
 * @(#)StreamTokenizer.java	1.20 97/02/26
 * 
 * Copyright (c) 1995, 1996 Sun Microsystems, Inc. All Rights Reserved.
 * 
 * This software is the confidential and proprietary information of Sun
 * Microsystems, Inc. ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Sun.
 * 
 * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
 * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
 * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
 * THIS SOFTWARE OR ITS DERIVATIVES.
 * 
 * CopyrightVersion 1.1_beta
 * 
 */

package java.io;


/**
 * The <code>StreamTokenizer</code> class takes an input stream and 
 * parses it into "tokens", allowing the tokens to be 
 * read one at a time. The parsing process is controlled by a table 
 * and a number of flags that can be set to various states. The 
 * stream tokenizer can recognize identifiers, numbers, quoted 
 * strings, and various comment styles. 
 * <p>
 * Each byte read from the input stream is regarded as a character 
 * in the range <code>'&#92;u0000'</code> through <code>'&#92;u00FF'</code>. 
 * The character value is used to look up five possible attributes of 
 * the character: <i>white space</i>, <i>alphabetic</i>, 
 * <i>numeric</i>, <i>string quote</i>, and <i>comment character</i>. 
 * Each character can have zero or more of these attributes. 
 * <p>
 * In addition, an instance has four flags. These flags indicate: 
 * <ul>
 * <li>Whether line terminators are to be returned as tokens or treated 
 *     as white space that merely separates tokens. 
 * <li>Whether C-style comments are to be recognized and skipped. 
 * <li>Whether C++-style comments are to be recognized and skipped. 
 * <li>Whether the characters of identifiers are converted to lowercase. 
 * </ul>
 * <p>
 * A typical application first constructs an instance of this class, 
 * sets up the syntax tables, and then repeatedly loops calling the 
 * <code>nextToken</code> method in each iteration of the loop until 
 * it returns the value <code>TT_EOF</code>. 
 *
 * @author  James Gosling
 * @version 1.20, 02/26/97
 * @see     java.io.StreamTokenizer#nextToken()
 * @see     java.io.StreamTokenizer#TT_EOF
 * @since   JDK1.0
 */

public class StreamTokenizer {

    /* Only one of these will be non-null */
    private Reader reader = null;
    private InputStream input = null;

    private char buf[] = new char[20];
    private int peekc;
    private boolean pushedBack;
    private boolean forceLower;
    /** The line number of the last token read */
    private int LINENO = 1;

    private boolean eolIsSignificantP = false;
    private boolean slashSlashCommentsP = false;
    private boolean slashStarCommentsP = false;

    private byte ctype[] = new byte[256];
    private static final byte CT_WHITESPACE = 1;
    private static final byte CT_DIGIT = 2;
    private static final byte CT_ALPHA = 4;
    private static final byte CT_QUOTE = 8;
    private static final byte CT_COMMENT = 16;

    /** 
     * After a call to the <code>nextToken</code> method, this field 
     * contains the type of the token just read. For a single character 
     * token, its value is the single character, converted to an integer. 
     * For a quoted string token (see , its value is the quote character. 
     * Otherwise, its value is one of the following: 
     * <ul>
     * <li><code>TT_WORD</code> indicates that the token is a word.
     * <li><code>TT_NUMBER</code> indicates that the token is a number.
     * <li><code>TT_EOL</code> indicates that the end of line has been read. 
     *     The field can only have this value if the 
     *     <code>eolIsSignificant</code> method has been called with the 
     *     argument <code>true</code>. 
     * <li><code>TT_EOF</code> indicates that the end of the input stream 
     *     has been reached. 
     * </ul>
     *
     * @see     java.io.StreamTokenizer#eolIsSignificant(boolean)
     * @see     java.io.StreamTokenizer#nextToken()
     * @see     java.io.StreamTokenizer#quoteChar(int)
     * @see     java.io.StreamTokenizer#TT_EOF
     * @see     java.io.StreamTokenizer#TT_EOL
     * @see     java.io.StreamTokenizer#TT_NUMBER
     * @see     java.io.StreamTokenizer#TT_WORD
     */
    public int ttype = TT_NOTHING;

    /** 
     * A constant indicating that the end of the stream has been read. 
     */
    public static final int TT_EOF = -1;

    /** 
     * A constant indicating that the end of the line has been read. 
     */
    public static final int TT_EOL = '\n';

    /** 
     * A constant indicating that a number token has been read. 
     */
    public static final int TT_NUMBER = -2;

    /** 
     * A constant indicating that a word token has been read. 
     */
    public static final int TT_WORD = -3;

    /* A constant indicating that no token has been read, used for
     * initializing ttype.  FIXME This could be made public and
     * made available as the part of the API in a future release.
     */
    private static final int TT_NOTHING = -4;
    
    /**
     * If the current token is a word token, this field contains a 
     * string giving the characters of the word token. When the current 
     * token is a quoted string token, this field contains the body of 
     * the string. 
     * <p>
     * The current token is a word when the value of the 
     * <code>ttype</code> field is <code>TT_WORD</code>. The current token is
     * a quoted string token when the value of the <code>ttype</code> field is
     * a quote character.
     *
     * @see     java.io.StreamTokenizer#quoteChar(int)
     * @see     java.io.StreamTokenizer#TT_WORD
     * @see     java.io.StreamTokenizer#ttype
     * @since JDK1.0
     */
    public String sval;

    /**
     * If the current token is a number, this field contains the value 
     * of that number. The current token is a number when the value of 
     * the <code>ttype</code> field is <code>TT_NUMBER</code>. 
     *
     * @see     java.io.StreamTokenizer#TT_NUMBER
     * @see     java.io.StreamTokenizer#ttype
     */
    public double nval;

    /** Private constructor that initializes everything except the streams. */
    private StreamTokenizer() {
	wordChars('a', 'z');
	wordChars('A', 'Z');
	wordChars(128 + 32, 255);
	whitespaceChars(0, ' ');
	commentChar('/');
	quoteChar('"');
	quoteChar('\'');
	parseNumbers();
    }

    /**
     * Creates a stream tokenizer that parses the specified input 
     * stream. The stream tokenizer is initialized to the following 
     * default state: 
     * <ul>
     * <li>All byte values <code>'A'</code> through <code>'Z'</code>, 
     *     <code>'a'</code> through <code>'z'</code>, and 
     *     <code>'&#92;u00A0'</code> through <code>'&#92;u00FF'</code> are
     *     considered to be alphabetic. 
     * <li>All byte values <code>'&#92;u0000'</code> through 
     *     <code>'&#92;u0020'</code> are considered to be white space. 
     * <li><code>'/'</code> is a comment character. 
     * <li>Single quote <code>'&#92;''</code> and double quote <code>'"'</code> 
     *     are string quote characters. 
     * <li>Numbers are parsed. 
     * <li>Ends of lines are treated as white space, not as separate tokens. 
     * <li>C-style and C++-style comments are not recognized. 
     * </ul>
     *
     * @deprecated As of JDK version 1.1, the preferred way to tokenize an
     * input stream is to convert it into a character stream, for example:
     * <p>
     * <pre>
     *   Reader r = new BufferedReader(new InputStreamReader(is));
     *   StreamTokenizer st = new StreamTokenizer(r);
     * </pre>
     *
     * @param      is        an input stream.
     * @see        java.io.BufferedReader
     * @see        java.io.InputStreamReader
     * @see        java.io.StreamTokenizer#StreamTokenizer(java.io.Reader)
     */
    public StreamTokenizer(InputStream is) {
	this();
	input = is;
    }

    /**
     * Create a tokenizer that parses the given character stream.
     * @since   JDK1.1
     */
    public StreamTokenizer(Reader r) {
	this();
	reader = r;
    }

    /** 
     * Resets this tokenizer's syntax table so that all characters are 
     * "ordinary." See the <code>ordinaryChar</code> method 
     * for more information on a character being ordinary. 
     *
     * @see     java.io.StreamTokenizer#ordinaryChar(int)
     */
    public void resetSyntax() {
	for (int i = ctype.length; --i >= 0;)
	    ctype[i] = 0;
    }

    /** 
     * Specifies that all characters <i>c</i> in the range 
     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code> 
     * are word constituents. A word token consists of a word constituent 
     * followed by zero or more word constituents or number constituents. 
     *
     * @param   low   the low end of the range.
     * @param   hi    the high end of the range.
     */
    public void wordChars(int low, int hi) {
	if (low < 0)
	    low = 0;
	if (hi >= ctype.length)
	    hi = ctype.length - 1;  
	while (low <= hi)
	    ctype[low++] |= CT_ALPHA;
    }

    /** 
     * Specifies that all characters <i>c</i> in the range 
     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code> 
     * are white space characters. White space characters serve only to 
     * separate tokens in the input stream. 
     *
     * @param   low   the low end of the range.
     * @param   hi    the high end of the range.
     */
    public void whitespaceChars(int low, int hi) {
	if (low < 0)
	    low = 0;
	if (hi >= ctype.length)
	    hi = ctype.length - 1;
	while (low <= hi)
	    ctype[low++] = CT_WHITESPACE;
    }

    /** 
     * Specifies that all characters <i>c</i> in the range 
     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code> 
     * are "ordinary" in this tokenizer. See the 
     * <code>ordinaryChar</code> method for more information on a 
     * character being ordinary. 
     *
     * @param   low   the low end of the range.
     * @param   hi    the high end of the range.
     * @see     java.io.StreamTokenizer#ordinaryChar(int)
     */
    public void ordinaryChars(int low, int hi) {
	if (low < 0)
	    low = 0;
	if (hi >= ctype.length)
	    hi = ctype.length - 1;
	while (low <= hi)
	    ctype[low++] = 0;
    }

    /** 
     * Specifies that the character argument is "ordinary" 
     * in this tokenizer. It removes any special significance the 
     * character has as a comment character, word component, string 
     * delimiter, white space, or number character. When such a character 
     * is encountered by the parser, the parser treates it as a
     * single-character token and sets <code>ttype</code> field to the
     * character value. 
     *
     * @param   ch   the character.
     * @see     java.io.StreamTokenizer#ttype
     */
    public void ordinaryChar(int ch) {
        if (ch >= 0 && ch < ctype.length)
  	    ctype[ch] = 0;
    }

    /** 
     * Specified that the character argument starts a single-line 
     * comment. All characters from the comment character to the end of 
     * the line are ignored by this stream tokenizer. 
     *
     * @param   ch   the character.
     */
    public void commentChar(int ch) {
        if (ch >= 0 && ch < ctype.length)
	    ctype[ch] = CT_COMMENT;
    }

    /** 
     * Specifies that matching pairs of this character delimit string 
     * constants in this tokenizer. 
     * <p>
     * When the <code>nextToken</code> method encounters a string 
     * constant, the <code>ttype</code> field is set to the string 
     * delimiter and the <code>sval</code> field is set to the body of 
     * the string. 
     * <p>
     * If a string quote character is encountered, then a string is 
     * recognized, consisting of all characters after (but not including) 
     * the string quote character, up to (but not including) the next 
     * occurrence of that same string quote character, or a line 
     * terminator, or end of file. The usual escape sequences such as 
     * <code>"&#92;n"</code> and <code>"&#92;t"</code> are recognized and 
     * converted to single characters as the string is parsed. 
     *
     * @param   ch   the character.
     * @see     java.io.StreamTokenizer#nextToken()
     * @see     java.io.StreamTokenizer#sval
     * @see     java.io.StreamTokenizer#ttype
     */
    public void quoteChar(int ch) {
        if (ch >= 0 && ch < ctype.length)
 	    ctype[ch] = CT_QUOTE;
    }

    /** 
     * Specifies that numbers should be parsed by this tokenizer. The 
     * syntax table of this tokenizer is modified so that each of the twelve
     * characters:
     * <ul><code>
     *      0 1 2 3 4 5 6 7 8 9 . -
     * </code></ul>
     * <p>
     * has the "numeric" attribute. 
     * <p>
     * When the parser encounters a word token that has the format of a 
     * double precision floating-point number, it treats the token as a 
     * number rather than a word, by setting the the <code>ttype</code> 
     * field to the value <code>TT_NUMBER</code> and putting the numeric 
     * value of the token into the <code>nval</code> field. 
     *
     * @see     java.io.StreamTokenizer#nval
     * @see     java.io.StreamTokenizer#TT_NUMBER
     * @see     java.io.StreamTokenizer#ttype
     */
    public void parseNumbers() {
	for (int i = '0'; i <= '9'; i++)
	    ctype[i] |= CT_DIGIT;
	ctype['.'] |= CT_DIGIT;
	ctype['-'] |= CT_DIGIT;
    }

    /**
     * Determines whether or not ends of line are treated as tokens.
     * If the flag argument is true, this tokenizer treats end of lines 
     * as tokens; the <code>nextToken</code> method returns 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?