streamtokenizer.java

来自「《移动Agent技术》一书的所有章节源代码。」· Java 代码 · 共 757 行 · 第 1/2 页

JAVA
757
字号
     * <code>TT_EOL</code> and also sets the <code>ttype</code> field to 
     * this value when an end of line is read. 
     * <p>
     * A line is a sequence of characters ending with either a 
     * carriage-return character (<code>'&#92;r'</code>) or a newline 
     * character (<code>'&#92;n'</code>). In addition, a carriage-return 
     * character followed immediately by a newline character is treated 
     * as a single end-of-line token. 
     * <p>
     * If the <code>flag</code> is false, end-of-line characters are 
     * treated as white space and serve only to separate tokens. 
     *
     * @param   flag   <code>true</code> indicates that end-of-line characters
     *                 are separate tokens; <code>false</code> indicates that
     *                 end-of-line characters are white space.
     * @see     java.io.StreamTokenizer#nextToken()
     * @see     java.io.StreamTokenizer#ttype
     * @see     java.io.StreamTokenizer#TT_EOL
     */
    public void eolIsSignificant(boolean flag) {
	eolIsSignificantP = flag;
    }

    /** 
     * Determines whether or not the tokenizer recognizes C-style comments.
     * If the flag argument is <code>true</code>, this stream tokenizer 
     * recognizes C-style comments. All text between successive 
     * occurrences of <code>/*</code> and <code>*&#47;</code> are discarded. 
     * <p>
     * If the flag argument is <code>false</code>, then C-style comments 
     * are not treated specially. 
     *
     * @param   flag   <code>true</code> indicates to recognize and ignore
     *                 C-style comments.
     */
    public void slashStarComments(boolean flag) {
	slashStarCommentsP = flag;
    }

    /** 
     * Determines whether or not the tokenizer recognizes C++-style comments.
     * If the flag argument is <code>true</code>, this stream tokenizer 
     * recognizes C++-style comments. Any occurrence of two consecutive 
     * slash characters (<code>'/'</code>) is treated as the beginning of 
     * a comment that extends to the end of the line. 
     * <p>
     * If the flag argument is <code>false</code>, then C++-style 
     * comments are not treated specially. 
     *
     * @param   flag   <code>true</code> indicates to recognize and ignore
     *                 C++-style comments.
     */
    public void slashSlashComments(boolean flag) {
	slashSlashCommentsP = flag;
    }

    /**
     * Determines whether or not word token are automatically lowercased.
     * If the flag argument is <code>true</code>, then the value in the 
     * <code>sval</code> field is lowercased whenever a word token is 
     * returned (the <code>ttype</code> field has the 
     * value <code>TT_WORD</code> by the <code>nextToken</code> method 
     * of this tokenizer. 
     * <p>
     * If the flag argument is <code>false</code>, then the 
     * <code>sval</code> field is not modified. 
     *
     * @param   fl   <code>true</code> indicates that all word tokens should
     *               be lowercased.
     * @see     java.io.StreamTokenizer#nextToken()
     * @see     java.io.StreamTokenizer#ttype
     * @see     java.io.StreamTokenizer#TT_WORD
     */
    public void lowerCaseMode(boolean fl) {
	forceLower = fl;
    }

    /** Read the next character */
    private int read() throws IOException {
	if (reader != null)
	    return reader.read();
	else if (input != null)
	    return input.read();
	else
	    throw new IllegalStateException();
    }

    /** 
     * Parses the next token from the input stream of this tokenizer. 
     * The type of the next token is returned in the <code>ttype</code> 
     * field. Additional information about the token may be in the 
     * <code>nval</code> field or the <code>sval</code> field of this 
     * tokenizer. 
     * <p>
     * Typical clients of this
     * class first set up the syntax tables and then sit in a loop
     * calling nextToken to parse successive tokens until TT_EOF
     * is returned. 
     *
     * @return     the value of the <code>ttype</code> field.
     * @exception  IOException  if an I/O error occurs.
     * @see        java.io.StreamTokenizer#nval
     * @see        java.io.StreamTokenizer#sval
     * @see        java.io.StreamTokenizer#ttype
     */
    public int nextToken() throws IOException {
	if (pushedBack) {
	    pushedBack = false;
	    return ttype;
	}
	byte ct[] = ctype;
	int c; 
	sval = null;

	if (ttype == TT_NOTHING) {
	    c = read();
	    if (c >= 0)    // ttype is surely overwritten below to its correct value.
	        ttype = c; // for now we just make sure it isn't TT_NOTHING
	} else {
	    c = peekc;
	}
	
	if (c < 0)
	    return ttype = TT_EOF;
	int ctype = c < 256 ? ct[c] : CT_ALPHA;
	while ((ctype & CT_WHITESPACE) != 0) {
	    if (c == '\r') {
		LINENO++;
		c = read();
		if (c == '\n')
		    c = read();
		if (eolIsSignificantP) {
		    peekc = c;
		    return ttype = TT_EOL;
		}
	    } else {
		if (c == '\n') {
		    LINENO++;
		    if (eolIsSignificantP) {
			peekc = read();
			return ttype = TT_EOL;
		    }
		}
		c = read();
	    }
	    if (c < 0)
		return ttype = TT_EOF;
	    ctype = c < 256 ? ct[c] : CT_ALPHA;
	}
	if ((ctype & CT_DIGIT) != 0) {
	    boolean neg = false;
	    if (c == '-') {
		c = read();
		if (c != '.' && (c < '0' || c > '9')) {
		    peekc = c;
		    return ttype = '-';
		}
		neg = true;
	    }
	    double v = 0;
	    int decexp = 0;
	    int seendot = 0;
	    while (true) {
		if (c == '.' && seendot == 0)
		    seendot = 1;
		else if ('0' <= c && c <= '9') {
		    v = v * 10 + (c - '0');
		    decexp += seendot;
		} else
		    break;
		c = read();
	    }
	    peekc = c;
	    if (decexp != 0) {
		double denom = 10;
		decexp--;
		while (decexp > 0) {
		    denom *= 10;
		    decexp--;
		}
		/* do one division of a likely-to-be-more-accurate number */
		v = v / denom;
	    }
	    nval = neg ? -v : v;
	    return ttype = TT_NUMBER;
	}
	if ((ctype & CT_ALPHA) != 0) {
	    int i = 0;
	    do {
		if (i >= buf.length) {
		    char nb[] = new char[buf.length * 2];
		    System.arraycopy(buf, 0, nb, 0, buf.length);
		    buf = nb;
		}
		buf[i++] = (char) c;
		c = read();
		ctype = c < 0 ? CT_WHITESPACE : c < 256 ? ct[c] : CT_ALPHA;
	    } while ((ctype & (CT_ALPHA | CT_DIGIT)) != 0);
	    peekc = c;
	    sval = String.copyValueOf(buf, 0, i);
	    if (forceLower)
		sval = sval.toLowerCase();
	    return ttype = TT_WORD;
	}
	if ((ctype & CT_COMMENT) != 0) {
	    while ((c = read()) != '\n' && c != '\r' && c >= 0);
	    peekc = c;
	    return nextToken();
	}
	if ((ctype & CT_QUOTE) != 0) {
	    ttype = c;
	    int i = 0;
	    // invariants (because \Octal needs a lookahead):
	    //      (i)  c contains char value 
	    //      (ii) peekc contains the lookahead
	    peekc = read(); 
	    while (peekc >= 0 && peekc != ttype && peekc != '\n' && peekc != '\r') {
	        if (peekc == '\\') {
   		    c = read();
		    int first = c;   // to allow \377, but not \477
		    if (c >= '0' && c <= '7') {
			c = c - '0';
			int c2 = read();
			if ('0' <= c2 && c2 <= '7') {
			    c = (c << 3) + (c2 - '0');
			    c2 = read();
			    if ('0' <= c2 && c2 <= '7' && first <= '3') {
				c = (c << 3) + (c2 - '0');
				peekc = read();
			    } else
				peekc = c2;
			} else
			  peekc = c2;
		    } else {
  		        switch (c) {
			case 'a':
			    c = 0x7;
			    break;
			case 'b':
			    c = '\b';
			    break;
			case 'f':
			    c = 0xC;
			    break;
			case 'n':
			    c = '\n';
			    break;
		        case 'r':
			    c = '\r';
			    break;
			case 't':
			    c = '\t';
			    break;
			case 'v':
			    c = 0xB;
			    break;
			}
			peekc = read();
		    }
		} else {
		    c = peekc;
		    peekc = read();
		}
		
		if (i >= buf.length) {
		    char nb[] = new char[buf.length * 2];
		    System.arraycopy(buf, 0, nb, 0, buf.length);
		    buf = nb;
		}
		buf[i++] = (char) c;
	    }
	    if (peekc == ttype)  // keep \n or \r intact in peekc
	        peekc = read();
	    sval = String.copyValueOf(buf, 0, i);
	    return ttype;
	}
	if (c == '/' && (slashSlashCommentsP || slashStarCommentsP)) {
	    c = read();
	    if (c == '*' && slashStarCommentsP) {
		int prevc = 0;
		while ((c = read()) != '/' || prevc != '*') {
		    if (c == '\r') {
			LINENO++;
			c = read();
			if (c == '\n') {
			    c = read();
			}
		    } else {
		        if (c == '\n') {
			    LINENO++;
			    c = read();
			}
		    }
		    if (c < 0)
		        return ttype = TT_EOF;
		    prevc = c;
		}
		peekc = read();
		return nextToken();
	    } else if (c == '/' && slashSlashCommentsP) {
	        while ((c = read()) != '\n' && c != '\r' && c >= 0);
	        peekc = c;
		return nextToken();
	    } else {
		peekc = c;
		return ttype = '/';
	    }
	}
	peekc = read();
	return ttype = c;
    }

    /**
     * Causes the next call to the <code>nextToken</code> method of this 
     * tokenizer to return the current value in the <code>ttype</code> 
     * field, and not to modify the value in the <code>nval</code> or 
     * <code>sval</code> field. 
     *
     * @see     java.io.StreamTokenizer#nextToken()
     * @see     java.io.StreamTokenizer#nval
     * @see     java.io.StreamTokenizer#sval
     * @see     java.io.StreamTokenizer#ttype
     */
    public void pushBack() {
        if (ttype != TT_NOTHING)   // no-op if nextToken() not called
	    pushedBack = true;
    }

    /**
     * Return the current line number.
     *
     * @return  the current line number of this stream tokenizer.
     */
    public int lineno() {
	return LINENO;
    }

    /**
     * Returns the string representation of the current stream token. 
     *
     * @return  a string representation of the token specified by the
     *          <code>ttype</code>, <code>nval</code>, and <code>sval</code>
     *          fields.
     * @see     java.io.StreamTokenizer#nval
     * @see     java.io.StreamTokenizer#sval
     * @see     java.io.StreamTokenizer#ttype
     */
    public String toString() {
	String ret;
	switch (ttype) {
	  case TT_EOF:
	    ret = "EOF";
	    break;
	  case TT_EOL:
	    ret = "EOL";
	    break;
	  case TT_WORD:
	    ret = sval;
	    break;
	  case TT_NUMBER:
	    ret = "n=" + nval;
	    break;
   	  case TT_NOTHING:  
	    ret = "NOTHING";
	    break;
	  default:{
		char s[] = new char[3];
		s[0] = s[2] = '\'';
		s[1] = (char) ttype;
		ret = new String(s);
		break;
	    }
	}
	return "Token[" + ret + "], line " + LINENO;
    }

}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?