⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlparser.java

📁 An open_source WAP browser. include Java code. support WML documents and WBMP images.
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
// XmlParser.java: the main parser class.
// NO WARRANTY! See README, and copyright below.
// $Id: XmlParser.java,v 1.1.1.1 2000/10/05 22:37:13 fuller Exp $

package com.microstar.xml;

import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Stack;


/**
  * Parse XML documents and return parse events through call-backs.
  * <p>You need to define a class implementing the <code>XmlHandler</code>
  * interface: an object belonging to this class will receive the
  * callbacks for the events.  (As an alternative to implementing
  * the full XmlHandler interface, you can simply extend the 
  * <code>HandlerBase</code> convenience class.)
  * <p>Usage (assuming that <code>MyHandler</code> is your implementation
  * of the <code>XmlHandler</code> interface):
  * <pre>
  * XmlHandler handler = new MyHandler();
  * XmlParser parser = new XmlParser();
  * parser.setHandler(handler);
  * try {
  *   parser.parse("http://www.host.com/doc.xml", null);
  * } catch (Exception e) {
  *   [do something interesting]
  * }
  * </pre>
  * <p>Alternatively, you can use the standard SAX interfaces
  * with the <code>SAXDriver</code> class as your entry point.
  * @author Copyright (c) 1997, 1998 by Microstar Software Ltd.
  * @author Written by David Megginson &lt;dmeggins@microstar.com&gt;
  * @version 1.1
  * @see XmlHandler
  * @see HandlerBase
  * @see SAXDriver
  */
public class XmlParser {

  //
  // Use special cheats that speed up the code (currently about 50%),
  // but may cause problems with future maintenance and add to the
  // class file size (about 500 bytes).
  //
  private final static boolean USE_CHEATS = true;



  //////////////////////////////////////////////////////////////////////
  // Constructors.
  ////////////////////////////////////////////////////////////////////////


  /**
    * Construct a new parser with no associated handler.
    * @see #setHandler
    * @see #parse
    */
  public XmlParser ()
  {
  }


  /**
    * Set the handler that will receive parsing events.
    * @param handler The handler to receive callback events.
    * @see #parse
    * @see XmlHandler
    */
  public void setHandler (XmlHandler handler)
  {
    this.handler = handler;
  }


  /**
    * Parse an XML document from a URI.
    * <p>You may parse a document more than once, but only one thread
    * may call this method for an object at one time.
    * @param systemId The URI of the document.
    * @param publicId The public identifier of the document, or null.
    * @param encoding The suggested encoding, or null if unknown.
    * @exception java.lang.Exception Any exception thrown by your
    *            own handlers, or any derivation of java.io.IOException
    *            thrown by the parser itself.
    */
  public void parse (String systemId, String publicId, String encoding)
    throws java.lang.Exception
  {
    doParse(systemId, publicId, null, null, encoding);
  }


  /**
    * Parse an XML document from a byte stream.
    * <p>The URI that you supply will become the base URI for
    * resolving relative links, but &AElig;lfred will actually read
    * the document from the supplied input stream.
    * <p>You may parse a document more than once, but only one thread
    * may call this method for an object at one time.
    * @param systemId The base URI of the document, or null if not
    *                 known.
    * @param publicId The public identifier of the document, or null
    *                 if not known.
    * @param stream A byte input stream.
    * @param encoding The suggested encoding, or null if unknown.
    * @exception java.lang.Exception Any exception thrown by your
    *            own handlers, or any derivation of java.io.IOException
    *            thrown by the parser itself.
    */
  public void parse (String systemId, String publicId,
		     InputStream stream, String encoding)
    throws java.lang.Exception
  {
    doParse(systemId, publicId, null, stream, encoding);
  }


  /**
    * Parse an XML document from a character stream.
    * <p>The URI that you supply will become the base URI for
    * resolving relative links, but &AElig;lfred will actually read
    * the document from the supplied input stream.
    * <p>You may parse a document more than once, but only one thread
    * may call this method for an object at one time.
    * @param systemId The base URI of the document, or null if not
    *                 known.
    * @param publicId The public identifier of the document, or null
    *                 if not known.
    * @param reader A character stream.
    * @exception java.lang.Exception Any exception thrown by your
    *            own handlers, or any derivation of java.io.IOException
    *            thrown by the parser itself.
    */
  public void parse (String systemId, String publicId, Reader reader)
    throws java.lang.Exception
  {
    doParse(systemId, publicId, reader, null, null);
  }


  private synchronized void doParse (String systemId, String publicId,
				     Reader reader, InputStream stream,
				     String encoding)
    throws java.lang.Exception
  {
    basePublicId = publicId;
    baseURI = systemId;
    baseReader = reader;
    baseInputStream = stream;

    initializeVariables();

				// Set the default entities here.
    setInternalEntity(intern("amp"), "&#38;");
    setInternalEntity(intern("lt"), "&#60;");
    setInternalEntity(intern("gt"), "&#62;");
    setInternalEntity(intern("apos"), "&#39;");
    setInternalEntity(intern("quot"), "&#34;");

    if (handler != null) {
      handler.startDocument();
    }

    pushURL("[document]", basePublicId, baseURI, baseReader, baseInputStream,
	    encoding);

    parseDocument();

    if (handler != null) {
      handler.endDocument();
    }
    cleanupVariables();
  }



  ////////////////////////////////////////////////////////////////////////
  // Constants.
  ////////////////////////////////////////////////////////////////////////

  //
  // Constants for element content type.
  //

  /**
    * Constant: an element has not been declared.
    * @see #getElementContentType
    */
  public final static int CONTENT_UNDECLARED = 0;

  /**
    * Constant: the element has a content model of ANY.
    * @see #getElementContentType
    */
  public final static int CONTENT_ANY = 1;

  /**
    * Constant: the element has declared content of EMPTY.
    * @see #getElementContentType
    */
  public final static int CONTENT_EMPTY = 2;

  /**
    * Constant: the element has mixed content.
    * @see #getElementContentType
    */
  public final static int CONTENT_MIXED = 3;

  /**
    * Constant: the element has element content.
    * @see #getElementContentType
    */
  public final static int CONTENT_ELEMENTS = 4;


  //
  // Constants for the entity type.
  //

  /**
    * Constant: the entity has not been declared.
    * @see #getEntityType
    */
  public final static int ENTITY_UNDECLARED = 0;

  /**
    * Constant: the entity is internal.
    * @see #getEntityType
    */
  public final static int ENTITY_INTERNAL = 1;

  /**
    * Constant: the entity is external, non-XML data.
    * @see #getEntityType
    */
  public final static int ENTITY_NDATA = 2;

  /**
    * Constant: the entity is external XML data.
    * @see #getEntityType
    */
  public final static int ENTITY_TEXT = 3;


  //
  // Constants for attribute type.
  //

  /**
    * Constant: the attribute has not been declared for this element type.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_UNDECLARED = 0;

  /**
    * Constant: the attribute value is a string value.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_CDATA = 1;

  /**
    * Constant: the attribute value is a unique identifier.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_ID = 2;

  /**
    * Constant: the attribute value is a reference to a unique identifier.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_IDREF = 3;

  /**
    * Constant: the attribute value is a list of ID references.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_IDREFS = 4;

  /**
    * Constant: the attribute value is the name of an entity.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_ENTITY = 5;

  /**
    * Constant: the attribute value is a list of entity names.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_ENTITIES = 6;

  /**
    * Constant: the attribute value is a name token.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_NMTOKEN = 7;

  /**
    * Constant: the attribute value is a list of name tokens.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_NMTOKENS = 8;

  /**
    * Constant: the attribute value is a token from an enumeration.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_ENUMERATED = 9;

  /**
    * Constant: the attribute is the name of a notation.
    * @see #getAttributeType
    */
  public final static int ATTRIBUTE_NOTATION = 10;


  //
  // When the class is loaded, populate the hash table of
  // attribute types.
  //

  /**
    * Hash table of attribute types.
    */
  private static Hashtable attributeTypeHash;
  static {
    attributeTypeHash = new Hashtable();
    attributeTypeHash.put("CDATA", new Integer(ATTRIBUTE_CDATA));
    attributeTypeHash.put("ID", new Integer(ATTRIBUTE_ID));
    attributeTypeHash.put("IDREF", new Integer(ATTRIBUTE_IDREF));
    attributeTypeHash.put("IDREFS", new Integer(ATTRIBUTE_IDREFS));
    attributeTypeHash.put("ENTITY", new Integer(ATTRIBUTE_ENTITY));
    attributeTypeHash.put("ENTITIES", new Integer(ATTRIBUTE_ENTITIES));
    attributeTypeHash.put("NMTOKEN", new Integer(ATTRIBUTE_NMTOKEN));
    attributeTypeHash.put("NMTOKENS", new Integer(ATTRIBUTE_NMTOKENS));
    attributeTypeHash.put("NOTATION", new Integer(ATTRIBUTE_NOTATION));
  }


  //
  // Constants for supported encodings.
  //
  private final static int ENCODING_UTF_8 = 1;
  private final static int ENCODING_ISO_8859_1 = 2;
  private final static int ENCODING_UCS_2_12 = 3;
  private final static int ENCODING_UCS_2_21 = 4;
  private final static int ENCODING_UCS_4_1234 = 5;
  private final static int ENCODING_UCS_4_4321 = 6;
  private final static int ENCODING_UCS_4_2143 = 7;
  private final static int ENCODING_UCS_4_3412 = 8;


  //
  // Constants for attribute default value.
  //

  /**
    * Constant: the attribute is not declared.
    * @see #getAttributeDefaultValueType
    */
  public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 0;

  /**
    * Constant: the attribute has a literal default value specified.
    * @see #getAttributeDefaultValueType
    * @see #getAttributeDefaultValue
    */
  public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 1;

  /**
    * Constant: the attribute was declared #IMPLIED.
    * @see #getAttributeDefaultValueType
    */
  public final static int ATTRIBUTE_DEFAULT_IMPLIED = 2;

  /**
    * Constant: the attribute was declared #REQUIRED.
    * @see #getAttributeDefaultValueType
    */
  public final static int ATTRIBUTE_DEFAULT_REQUIRED = 3;

  /**
    * Constant: the attribute was declared #FIXED.
    * @see #getAttributeDefaultValueType
    * @see #getAttributeDefaultValue
    */
  public final static int ATTRIBUTE_DEFAULT_FIXED = 4;


  //
  // Constants for input.
  //
  private final static int INPUT_NONE = 0;
  private final static int INPUT_INTERNAL = 1;
  private final static int INPUT_EXTERNAL = 2;
  private final static int INPUT_STREAM = 3;
  private final static int INPUT_BUFFER = 4;
  private final static int INPUT_READER = 5;


  //
  // Flags for reading literals.
  //
  private final static int LIT_CHAR_REF = 1;
  private final static int LIT_ENTITY_REF = 2;
  private final static int LIT_PE_REF = 4;
  private final static int LIT_NORMALIZE = 8;


  //
  // Flags for parsing context.
  //
  private final static int CONTEXT_NONE = 0;
  private final static int CONTEXT_DTD = 1;
  private final static int CONTEXT_ENTITYVALUE = 2;
  private final static int CONTEXT_ATTRIBUTEVALUE = 3;



  //////////////////////////////////////////////////////////////////////
  // Error reporting.
  //////////////////////////////////////////////////////////////////////


  /**
    * Report an error.
    * @param message The error message.
    * @param textFound The text that caused the error (or null).
    * @see XmlHandler#error
    * @see #line
    */
  void error (String message, String textFound, String textExpected)
    throws java.lang.Exception
  {
    errorCount++;
    if (textFound != null) {
      message = message + " (found \"" + textFound + "\")";
    }
    if (textExpected != null) {
      message = message + " (expected \"" + textExpected + "\")";
    }
    if (handler != null) {
      String uri = null;

      if (externalEntity != null) {
	uri = externalEntity.getURL().toString();
      }
      handler.error(message, uri, line, column);
    }
  }


  /**
    * Report a serious error.
    * @param message The error message.
    * @param textFound The text that caused the error (or null).
    */
  void error (String message, char textFound, String textExpected)
    throws java.lang.Exception
  {
    error(message, new Character(textFound).toString(), textExpected);
  }



  //////////////////////////////////////////////////////////////////////
  // Major syntactic productions.
  //////////////////////////////////////////////////////////////////////


  /**
    * Parse an XML document.
    * <pre>
    * [1] document ::= prolog element Misc*
    * </pre>
    * <p>This is the top-level parsing function for a single XML
    * document.  As a minimum, a well-formed document must have
    * a document element, and a valid document must have a prolog
    * as well.
    */
  void parseDocument ()
    throws java.lang.Exception
    {
    char c;

    parseProlog();
    require('<');
    parseElement();
    try
      {
      parseMisc();  //skip all white, PIs, and comments
      c=readCh();   //if this doesn't throw an exception...
      error("unexpected characters after document end",c,null);
      }
    catch (EOFException e)
      {return;}
    }


  /**
    * Skip a comment.
    * <pre>
    * [18] Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
    * </pre>
    * <p>(The <code>&lt;!--</code> has already been read.)
    */
  void parseComment ()
    throws java.lang.Exception
  {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -