streamtokenizer.java
来自「《移动Agent技术》一书的所有章节源代码。」· Java 代码 · 共 757 行 · 第 1/2 页
JAVA
757 行
/*
* @(#)StreamTokenizer.java 1.20 97/02/26
*
* Copyright (c) 1995, 1996 Sun Microsystems, Inc. All Rights Reserved.
*
* This software is the confidential and proprietary information of Sun
* Microsystems, Inc. ("Confidential Information"). You shall not
* disclose such Confidential Information and shall use it only in
* accordance with the terms of the license agreement you entered into
* with Sun.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
* SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
* SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
* THIS SOFTWARE OR ITS DERIVATIVES.
*
* CopyrightVersion 1.1_beta
*
*/
package java.io;
/**
* The <code>StreamTokenizer</code> class takes an input stream and
* parses it into "tokens", allowing the tokens to be
* read one at a time. The parsing process is controlled by a table
* and a number of flags that can be set to various states. The
* stream tokenizer can recognize identifiers, numbers, quoted
* strings, and various comment styles.
* <p>
* Each byte read from the input stream is regarded as a character
* in the range <code>'\u0000'</code> through <code>'\u00FF'</code>.
* The character value is used to look up five possible attributes of
* the character: <i>white space</i>, <i>alphabetic</i>,
* <i>numeric</i>, <i>string quote</i>, and <i>comment character</i>.
* Each character can have zero or more of these attributes.
* <p>
* In addition, an instance has four flags. These flags indicate:
* <ul>
* <li>Whether line terminators are to be returned as tokens or treated
* as white space that merely separates tokens.
* <li>Whether C-style comments are to be recognized and skipped.
* <li>Whether C++-style comments are to be recognized and skipped.
* <li>Whether the characters of identifiers are converted to lowercase.
* </ul>
* <p>
* A typical application first constructs an instance of this class,
* sets up the syntax tables, and then repeatedly loops calling the
* <code>nextToken</code> method in each iteration of the loop until
* it returns the value <code>TT_EOF</code>.
*
* @author James Gosling
* @version 1.20, 02/26/97
* @see java.io.StreamTokenizer#nextToken()
* @see java.io.StreamTokenizer#TT_EOF
* @since JDK1.0
*/
public class StreamTokenizer {
/* Only one of these will be non-null */
private Reader reader = null;
private InputStream input = null;
private char buf[] = new char[20];
private int peekc;
private boolean pushedBack;
private boolean forceLower;
/** The line number of the last token read */
private int LINENO = 1;
private boolean eolIsSignificantP = false;
private boolean slashSlashCommentsP = false;
private boolean slashStarCommentsP = false;
private byte ctype[] = new byte[256];
private static final byte CT_WHITESPACE = 1;
private static final byte CT_DIGIT = 2;
private static final byte CT_ALPHA = 4;
private static final byte CT_QUOTE = 8;
private static final byte CT_COMMENT = 16;
/**
* After a call to the <code>nextToken</code> method, this field
* contains the type of the token just read. For a single character
* token, its value is the single character, converted to an integer.
* For a quoted string token (see , its value is the quote character.
* Otherwise, its value is one of the following:
* <ul>
* <li><code>TT_WORD</code> indicates that the token is a word.
* <li><code>TT_NUMBER</code> indicates that the token is a number.
* <li><code>TT_EOL</code> indicates that the end of line has been read.
* The field can only have this value if the
* <code>eolIsSignificant</code> method has been called with the
* argument <code>true</code>.
* <li><code>TT_EOF</code> indicates that the end of the input stream
* has been reached.
* </ul>
*
* @see java.io.StreamTokenizer#eolIsSignificant(boolean)
* @see java.io.StreamTokenizer#nextToken()
* @see java.io.StreamTokenizer#quoteChar(int)
* @see java.io.StreamTokenizer#TT_EOF
* @see java.io.StreamTokenizer#TT_EOL
* @see java.io.StreamTokenizer#TT_NUMBER
* @see java.io.StreamTokenizer#TT_WORD
*/
public int ttype = TT_NOTHING;
/**
* A constant indicating that the end of the stream has been read.
*/
public static final int TT_EOF = -1;
/**
* A constant indicating that the end of the line has been read.
*/
public static final int TT_EOL = '\n';
/**
* A constant indicating that a number token has been read.
*/
public static final int TT_NUMBER = -2;
/**
* A constant indicating that a word token has been read.
*/
public static final int TT_WORD = -3;
/* A constant indicating that no token has been read, used for
* initializing ttype. FIXME This could be made public and
* made available as the part of the API in a future release.
*/
private static final int TT_NOTHING = -4;
/**
* If the current token is a word token, this field contains a
* string giving the characters of the word token. When the current
* token is a quoted string token, this field contains the body of
* the string.
* <p>
* The current token is a word when the value of the
* <code>ttype</code> field is <code>TT_WORD</code>. The current token is
* a quoted string token when the value of the <code>ttype</code> field is
* a quote character.
*
* @see java.io.StreamTokenizer#quoteChar(int)
* @see java.io.StreamTokenizer#TT_WORD
* @see java.io.StreamTokenizer#ttype
* @since JDK1.0
*/
public String sval;
/**
* If the current token is a number, this field contains the value
* of that number. The current token is a number when the value of
* the <code>ttype</code> field is <code>TT_NUMBER</code>.
*
* @see java.io.StreamTokenizer#TT_NUMBER
* @see java.io.StreamTokenizer#ttype
*/
public double nval;
/** Private constructor that initializes everything except the streams. */
private StreamTokenizer() {
wordChars('a', 'z');
wordChars('A', 'Z');
wordChars(128 + 32, 255);
whitespaceChars(0, ' ');
commentChar('/');
quoteChar('"');
quoteChar('\'');
parseNumbers();
}
/**
* Creates a stream tokenizer that parses the specified input
* stream. The stream tokenizer is initialized to the following
* default state:
* <ul>
* <li>All byte values <code>'A'</code> through <code>'Z'</code>,
* <code>'a'</code> through <code>'z'</code>, and
* <code>'\u00A0'</code> through <code>'\u00FF'</code> are
* considered to be alphabetic.
* <li>All byte values <code>'\u0000'</code> through
* <code>'\u0020'</code> are considered to be white space.
* <li><code>'/'</code> is a comment character.
* <li>Single quote <code>'\''</code> and double quote <code>'"'</code>
* are string quote characters.
* <li>Numbers are parsed.
* <li>Ends of lines are treated as white space, not as separate tokens.
* <li>C-style and C++-style comments are not recognized.
* </ul>
*
* @deprecated As of JDK version 1.1, the preferred way to tokenize an
* input stream is to convert it into a character stream, for example:
* <p>
* <pre>
* Reader r = new BufferedReader(new InputStreamReader(is));
* StreamTokenizer st = new StreamTokenizer(r);
* </pre>
*
* @param is an input stream.
* @see java.io.BufferedReader
* @see java.io.InputStreamReader
* @see java.io.StreamTokenizer#StreamTokenizer(java.io.Reader)
*/
public StreamTokenizer(InputStream is) {
this();
input = is;
}
/**
* Create a tokenizer that parses the given character stream.
* @since JDK1.1
*/
public StreamTokenizer(Reader r) {
this();
reader = r;
}
/**
* Resets this tokenizer's syntax table so that all characters are
* "ordinary." See the <code>ordinaryChar</code> method
* for more information on a character being ordinary.
*
* @see java.io.StreamTokenizer#ordinaryChar(int)
*/
public void resetSyntax() {
for (int i = ctype.length; --i >= 0;)
ctype[i] = 0;
}
/**
* Specifies that all characters <i>c</i> in the range
* <code>low <= <i>c</i> <= high</code>
* are word constituents. A word token consists of a word constituent
* followed by zero or more word constituents or number constituents.
*
* @param low the low end of the range.
* @param hi the high end of the range.
*/
public void wordChars(int low, int hi) {
if (low < 0)
low = 0;
if (hi >= ctype.length)
hi = ctype.length - 1;
while (low <= hi)
ctype[low++] |= CT_ALPHA;
}
/**
* Specifies that all characters <i>c</i> in the range
* <code>low <= <i>c</i> <= high</code>
* are white space characters. White space characters serve only to
* separate tokens in the input stream.
*
* @param low the low end of the range.
* @param hi the high end of the range.
*/
public void whitespaceChars(int low, int hi) {
if (low < 0)
low = 0;
if (hi >= ctype.length)
hi = ctype.length - 1;
while (low <= hi)
ctype[low++] = CT_WHITESPACE;
}
/**
* Specifies that all characters <i>c</i> in the range
* <code>low <= <i>c</i> <= high</code>
* are "ordinary" in this tokenizer. See the
* <code>ordinaryChar</code> method for more information on a
* character being ordinary.
*
* @param low the low end of the range.
* @param hi the high end of the range.
* @see java.io.StreamTokenizer#ordinaryChar(int)
*/
public void ordinaryChars(int low, int hi) {
if (low < 0)
low = 0;
if (hi >= ctype.length)
hi = ctype.length - 1;
while (low <= hi)
ctype[low++] = 0;
}
/**
* Specifies that the character argument is "ordinary"
* in this tokenizer. It removes any special significance the
* character has as a comment character, word component, string
* delimiter, white space, or number character. When such a character
* is encountered by the parser, the parser treates it as a
* single-character token and sets <code>ttype</code> field to the
* character value.
*
* @param ch the character.
* @see java.io.StreamTokenizer#ttype
*/
public void ordinaryChar(int ch) {
if (ch >= 0 && ch < ctype.length)
ctype[ch] = 0;
}
/**
* Specified that the character argument starts a single-line
* comment. All characters from the comment character to the end of
* the line are ignored by this stream tokenizer.
*
* @param ch the character.
*/
public void commentChar(int ch) {
if (ch >= 0 && ch < ctype.length)
ctype[ch] = CT_COMMENT;
}
/**
* Specifies that matching pairs of this character delimit string
* constants in this tokenizer.
* <p>
* When the <code>nextToken</code> method encounters a string
* constant, the <code>ttype</code> field is set to the string
* delimiter and the <code>sval</code> field is set to the body of
* the string.
* <p>
* If a string quote character is encountered, then a string is
* recognized, consisting of all characters after (but not including)
* the string quote character, up to (but not including) the next
* occurrence of that same string quote character, or a line
* terminator, or end of file. The usual escape sequences such as
* <code>"\n"</code> and <code>"\t"</code> are recognized and
* converted to single characters as the string is parsed.
*
* @param ch the character.
* @see java.io.StreamTokenizer#nextToken()
* @see java.io.StreamTokenizer#sval
* @see java.io.StreamTokenizer#ttype
*/
public void quoteChar(int ch) {
if (ch >= 0 && ch < ctype.length)
ctype[ch] = CT_QUOTE;
}
/**
* Specifies that numbers should be parsed by this tokenizer. The
* syntax table of this tokenizer is modified so that each of the twelve
* characters:
* <ul><code>
* 0 1 2 3 4 5 6 7 8 9 . -
* </code></ul>
* <p>
* has the "numeric" attribute.
* <p>
* When the parser encounters a word token that has the format of a
* double precision floating-point number, it treats the token as a
* number rather than a word, by setting the the <code>ttype</code>
* field to the value <code>TT_NUMBER</code> and putting the numeric
* value of the token into the <code>nval</code> field.
*
* @see java.io.StreamTokenizer#nval
* @see java.io.StreamTokenizer#TT_NUMBER
* @see java.io.StreamTokenizer#ttype
*/
public void parseNumbers() {
for (int i = '0'; i <= '9'; i++)
ctype[i] |= CT_DIGIT;
ctype['.'] |= CT_DIGIT;
ctype['-'] |= CT_DIGIT;
}
/**
* Determines whether or not ends of line are treated as tokens.
* If the flag argument is true, this tokenizer treats end of lines
* as tokens; the <code>nextToken</code> method returns
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?