📄 scanner.java

📁 XML的DTD的解析、对XML文件作用的JAVA源代码。
💻 JAVA
字号:
package com.wutka.dtd;

import java.io.*;
import java.util.*;

/** Lexical scanner for DTD's
 *
 * @author Mark Wutka
 * @version $Revision: 1.19 $ $Date: 2002/07/31 00:19:10 $ by $Author: wutka $
 */

class Scanner
{
	public static final TokenType LTQUES = new TokenType(0, "LTQUES");
	public static final TokenType IDENTIFIER = new TokenType(1, "IDENTIFIER");
	public static final TokenType EQUAL = new TokenType(2, "EQUAL");
	public static final TokenType LPAREN = new TokenType(3, "LPAREN");
	public static final TokenType RPAREN = new TokenType(4, "RPAREN");
	public static final TokenType COMMA = new TokenType(5, "COMMA");
	public static final TokenType STRING = new TokenType(6, "STRING");
	public static final TokenType QUESGT = new TokenType(7, "QUESGT");
	public static final TokenType LTBANG = new TokenType(8, "LTBANG");
	public static final TokenType GT = new TokenType(9, "GT");
	public static final TokenType PIPE = new TokenType(10, "PIPE");
	public static final TokenType QUES = new TokenType(11, "QUES");
	public static final TokenType PLUS = new TokenType(12, "PLUS");
	public static final TokenType ASTERISK = new TokenType(13, "ASTERISK");
	public static final TokenType LT = new TokenType(14, "LT");
	public static final TokenType EOF = new TokenType(15, "EOF");
	public static final TokenType COMMENT = new TokenType(16, "COMMENT");
	public static final TokenType PERCENT = new TokenType(17, "PERCENT");
	public static final TokenType CONDITIONAL =
        new TokenType(18, "CONDITIONAL");
	public static final TokenType ENDCONDITIONAL =
        new TokenType(19, "ENDCONDITIONAL");
    public static final TokenType NMTOKEN = new TokenType(20, "NMTOKEN");

    protected class StreamInfo
    {
        String      id;
        Reader      in;
        int         lineNumber = 1;
        int         column = 1;

        StreamInfo(String id, Reader in)
        {
            this.id = id;
            this.in = in;
        }
    };

    protected StreamInfo in;
    protected Stack inputStreams;
	protected Token nextToken;
	protected int nextChar;
    protected boolean atEOF;
    protected boolean trace;
    protected char[] expandBuffer;
    protected int expandPos;
    protected Hashtable entityExpansion;
    protected EntityExpansion expander;

	public Scanner(Reader inReader, EntityExpansion anExpander)
	{
        this(inReader, false, anExpander);
	}

	public Scanner(Reader inReader, boolean doTrace, EntityExpansion anExpander)
    {
        in = new StreamInfo("", inReader);
        atEOF = false;
        trace = doTrace;
        expandBuffer = null;
        entityExpansion = new Hashtable();
        expander = anExpander;
    }

	public Token peek()
		throws IOException
	{
		if (nextToken == null)
		{
			nextToken = readNextToken();
		}

		return nextToken;
	}

	public Token get()
		throws IOException
	{
		if (nextToken == null)
		{
			nextToken = readNextToken();
		}

		Token retval = nextToken;
		nextToken = null;

		return retval;
	}

    protected int readNextChar()
        throws IOException
    {
        int ch = in.in.read();

        if (ch < 0)
        {
            if ((inputStreams != null) && (!inputStreams.empty()))
            {
                in.in.close();
                in = (StreamInfo) inputStreams.pop();
                return readNextChar();
            }
        }
        return ch;
    }

	protected int peekChar()
		throws IOException
	{
        if (expandBuffer != null)
        {
            return (int) expandBuffer[expandPos];
        }

		if (nextChar == 0)
		{
			nextChar = readNextChar();
            in.column++;
            if (nextChar == '\n')
            {
                in.lineNumber++;
                in.column=1;
            }
		}

		return nextChar;
	}

	protected int read()
		throws IOException
	{
        if (expandBuffer != null)
        {
            int expNextChar = expandBuffer[expandPos++];
            if (expandPos >= expandBuffer.length)
            {
                expandPos = -1;
                expandBuffer = null;
            }
            if (trace)
            {
                System.out.print((char) expNextChar);
            }
            return expNextChar;
        }
		if (nextChar == 0)
		{
			peekChar();
		}

		int retval = nextChar;
		nextChar = 0;

        if (trace)
        {
            System.out.print((char) retval);
        }
		return retval;
	}

    public String getUntil(char stopChar)
        throws IOException
    {
        StringBuffer out = new StringBuffer();

        int ch;

        while ((ch = read()) >= 0)
        {
            if (ch == stopChar)
            {
                return out.toString();
            }
            out.append((char) ch);
        }
        return out.toString();
    }

    public void skipUntil(char stopChar)
        throws IOException
    {
        int ch;

        while ((ch = read()) >= 0)
        {
            if (ch == stopChar)
            {
                return;
            }
        }
        return;
    }

	protected Token readNextToken()
		throws IOException
	{
		for (;;)
		{
			int ch = read();

			if (ch == '<')
			{
				ch = peekChar();
				if (ch == '!')
				{
					read();

                    if (peekChar() == '[')
                    {
                        read();

                        return new Token(CONDITIONAL);
                    }

					if (peekChar() != '-')
					{
						return new Token(LTBANG);
					}
					else
					{
						read();
						if (peekChar() != '-')
						{
                            throw new DTDParseException(getUriId(),
								"Invalid character sequence <!-"+read(),
                                getLineNumber(), getColumn());
						}
						read();

						StringBuffer buff = new StringBuffer();
						for (;;)
						{
                            if (peekChar() < 0)
                            {
                                throw new DTDParseException(getUriId(),
                                    "Unterminated comment: <!--"+
                                    buff.toString(),
                                    getLineNumber(), getColumn());
                            }

							if (peekChar() != '-')
							{
								buff.append((char) read());
							}
							else
							{
								read();
                                if (peekChar() < 0)
                                {
                                    throw new DTDParseException(getUriId(),
                                        "Unterminated comment: <!--"+
                                        buff.toString(),
                                        getLineNumber(), getColumn());
                                }
								if (peekChar() == '-')
								{
									read();
									if (peekChar() != '>')
									{
                                        throw new DTDParseException(getUriId(),
											"Invalid character sequence --"+
											read(), getLineNumber(), getColumn());
									}
									read();
									return new Token(COMMENT, buff.toString());
								}
								else
								{
									buff.append('-');
								}
							}
						}
					}
				}
				else if (ch == '?')
				{
					read();
					return new Token(LTQUES);
				}
				else
				{
					return new Token(LT);
				}
			}
			else if (ch == '?')
			{
// Need to treat ?> as two separate tokens because
// <!ELEMENT blah (foo)?> needs the ? as a QUES, not QUESGT
/*				ch = peekChar();

				if (ch == '>')
				{
					read();
					return new Token(QUESGT);
				}
				else
				{
					return new Token(QUES);
				}*/
				return new Token(QUES);
			}
			else if ((ch == '"') || (ch == '\''))
			{
				int quoteChar = ch;

				StringBuffer buff = new StringBuffer();
				while (peekChar() != quoteChar)
				{
					ch = read();
					if (ch == '\\')
					{
						buff.append((char) read());
					}                    else if (ch < 0)                    {                        break;  // IF EOF before getting end quote                    }
					else
					{
						buff.append((char) ch);
					}
				}
				read();
				return new Token(STRING, buff.toString());
			}
			else if (ch == '(')
			{
				return new Token(LPAREN);
			}
			else if (ch == ')')
			{
				return new Token(RPAREN);
			}
			else if (ch == '|')
			{
				return new Token(PIPE);
			}
			else if (ch == '>')
			{
				return new Token(GT);
			}
			else if (ch == '=')
			{
				return new Token(EQUAL);
			}
			else if (ch == '*')
			{
				return new Token(ASTERISK);
			}
            else if (ch == ']')
            {
                if (read() != ']')
                {
                    throw new DTDParseException(getUriId(),
                        "Illegal character in input stream: "+ch,
                        getLineNumber(), getColumn());
                }
                if (read() != '>')
                {
                    throw new DTDParseException(getUriId(),
                        "Illegal character in input stream: "+ch,
                        getLineNumber(), getColumn());
                }

                return new Token(ENDCONDITIONAL);
            }
			else if (ch == '#')
			{
				StringBuffer buff = new StringBuffer();
				buff.append((char) ch);

                if (isIdentifierChar((char) peekChar()))                {                    buff.append((char) read());				    while (isNameChar((char) peekChar()))
				    {
					    buff.append((char) read());
				    }
                }				return new Token(IDENTIFIER, buff.toString());
			}
			else if ((ch == '&') || (ch == '%'))
			{
                if ((ch == '%') && Character.isWhitespace((char)peekChar()))
                {
                    return new Token(PERCENT);
                }

                boolean peRef = (ch == '%');

				StringBuffer buff = new StringBuffer();
				buff.append((char) ch);

                if (isIdentifierChar((char) peekChar()))                {                    buff.append((char) read());				    while (isNameChar((char) peekChar()))
				    {
					    buff.append((char) read());
				    }
                }
				if (read() != ';')
				{
                    throw new DTDParseException(getUriId(),
                                "Expected ';' after reference "+
                                buff.toString()+", found '"+ch+"'",
                                getLineNumber(), getColumn());
				}
                buff.append(';');

                if (peRef)
                {                    if (expandEntity(buff.toString()))                    {
                        continue;                    }                    else                    {                        // MAW: Added version 1.17                        // If the entity can't be expanded, don't return it, skip it                        continue;                    }
                }
				return new Token(IDENTIFIER, buff.toString());
			}
			else if (ch == '+')
			{
				return new Token(PLUS);
			}
			else if (ch == ',')
			{
				return new Token(COMMA);
			}
			else if (isIdentifierChar((char) ch))
			{
				StringBuffer buff = new StringBuffer();
				buff.append((char) ch);

				while (isNameChar((char) peekChar()))
				{
					buff.append((char) read());
				}
				return new Token(IDENTIFIER, buff.toString());
			}
			else if (isNameChar((char) ch))
			{
				StringBuffer buff = new StringBuffer();
				buff.append((char) ch);

				while (isNameChar((char) peekChar()))
				{
					buff.append((char) read());
				}
				return new Token(NMTOKEN, buff.toString());
			}
			else if (ch < 0)
			{
                if (atEOF)
                {
                    throw new IOException("Read past EOF");
                }
                atEOF = true;
				return new Token(EOF);
			}
			else if (Character.isWhitespace((char) ch))
			{
				continue;
			}
			else
			{
                throw new DTDParseException(getUriId(),
                                "Illegal character in input stream: "+ch,
                                getLineNumber(), getColumn());
			}
		}
	}

    public void skipConditional()
        throws IOException
    {
// 070401 MAW: Fix for nested conditionals provided by Noah Fike
        // BEGIN CHANGE
        int ch = 0;
        int nestingDepth = 0; // Add nestingDepth parameter

//    Everything is ignored within an ignored section, except the
//    sub-section delimiters '<![' and ']]>'. These must be balanced,
//    but no section keyword is required:
//    Conditional Section
//[61] conditionalSect ::= 爄ncludeSect | ignoreSect
//[62] includeSect ::=
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -