📄 xmlparser.java
字号:
// XmlParser.java: the main parser class.// NO WARRANTY! See README, and copyright below.// $Id: XmlParser.java,v 2.6 1998/05/02 00:56:52 david Exp $package com.microstar.xml;import java.io.BufferedInputStream;import java.io.EOFException;import java.io.InputStream;import java.io.Reader;import java.net.URL;import java.net.URLConnection;import java.util.Enumeration;import java.util.Hashtable;import java.util.Stack;/** * Parse XML documents and return parse events through call-backs. * <p>You need to define a class implementing the <code>XmlHandler</code> * interface: an object belonging to this class will receive the * callbacks for the events. (As an alternative to implementing * the full XmlHandler interface, you can simply extend the * <code>HandlerBase</code> convenience class.) * <p>Usage (assuming that <code>MyHandler</code> is your implementation * of the <code>XmlHandler</code> interface): * <pre> * XmlHandler handler = new MyHandler(); * XmlParser parser = new XmlParser(); * parser.setHandler(handler); * try { * parser.parse("http://www.host.com/doc.xml", null); * } catch (Exception e) { * [do something interesting] * } * </pre> * <p>Alternatively, you can use the standard SAX interfaces * with the <code>SAXDriver</code> class as your entry point. * @author Copyright (c) 1997, 1998 by Microstar Software Ltd. * @author Written by David Megginson <dmeggins@microstar.com> * @version 1.1 * @see XmlHandler * @see HandlerBase * @see SAXDriver */public class XmlParser { // // Use special cheats that speed up the code (currently about 50%), // but may cause problems with future maintenance and add to the // class file size (about 500 bytes). // private final static boolean USE_CHEATS = true; ////////////////////////////////////////////////////////////////////// // Constructors. //////////////////////////////////////////////////////////////////////// /** * Construct a new parser with no associated handler. * @see #setHandler * @see #parse */ public XmlParser () { } /** * Set the handler that will receive parsing events. * @param handler The handler to receive callback events. * @see #parse * @see XmlHandler */ public void setHandler (XmlHandler handler) { this.handler = handler; } /** * Parse an XML document from a URI. * <p>You may parse a document more than once, but only one thread * may call this method for an object at one time. * @param systemId The URI of the document. * @param publicId The public identifier of the document, or null. * @param encoding The suggested encoding, or null if unknown. * @exception java.lang.Exception Any exception thrown by your * own handlers, or any derivation of java.io.IOException * thrown by the parser itself. */ public void parse (String systemId, String publicId, String encoding) throws java.lang.Exception { doParse(systemId, publicId, null, null, encoding); } /** * Parse an XML document from a byte stream. * <p>The URI that you supply will become the base URI for * resolving relative links, but Ælfred will actually read * the document from the supplied input stream. * <p>You may parse a document more than once, but only one thread * may call this method for an object at one time. * @param systemId The base URI of the document, or null if not * known. * @param publicId The public identifier of the document, or null * if not known. * @param stream A byte input stream. * @param encoding The suggested encoding, or null if unknown. * @exception java.lang.Exception Any exception thrown by your * own handlers, or any derivation of java.io.IOException * thrown by the parser itself. */ public void parse (String systemId, String publicId, InputStream stream, String encoding) throws java.lang.Exception { doParse(systemId, publicId, null, stream, encoding); } /** * Parse an XML document from a character stream. * <p>The URI that you supply will become the base URI for * resolving relative links, but Ælfred will actually read * the document from the supplied input stream. * <p>You may parse a document more than once, but only one thread * may call this method for an object at one time. * @param systemId The base URI of the document, or null if not * known. * @param publicId The public identifier of the document, or null * if not known. * @param reader A character stream. * @exception java.lang.Exception Any exception thrown by your * own handlers, or any derivation of java.io.IOException * thrown by the parser itself. */ public void parse (String systemId, String publicId, Reader reader) throws java.lang.Exception { doParse(systemId, publicId, reader, null, null); } private synchronized void doParse (String systemId, String publicId, Reader reader, InputStream stream, String encoding) throws java.lang.Exception { basePublicId = publicId; baseURI = systemId; baseReader = reader; baseInputStream = stream; initializeVariables(); // Set the default entities here. setInternalEntity(intern("amp"), "&"); setInternalEntity(intern("lt"), "<"); setInternalEntity(intern("gt"), ">"); setInternalEntity(intern("apos"), "'"); setInternalEntity(intern("quot"), """); if (handler != null) { handler.startDocument(); } pushURL("[document]", basePublicId, baseURI, baseReader, baseInputStream, encoding); parseDocument(); if (handler != null) { handler.endDocument(); } cleanupVariables(); } //////////////////////////////////////////////////////////////////////// // Constants. //////////////////////////////////////////////////////////////////////// // // Constants for element content type. // /** * Constant: an element has not been declared. * @see #getElementContentType */ public final static int CONTENT_UNDECLARED = 0; /** * Constant: the element has a content model of ANY. * @see #getElementContentType */ public final static int CONTENT_ANY = 1; /** * Constant: the element has declared content of EMPTY. * @see #getElementContentType */ public final static int CONTENT_EMPTY = 2; /** * Constant: the element has mixed content. * @see #getElementContentType */ public final static int CONTENT_MIXED = 3; /** * Constant: the element has element content. * @see #getElementContentType */ public final static int CONTENT_ELEMENTS = 4; // // Constants for the entity type. // /** * Constant: the entity has not been declared. * @see #getEntityType */ public final static int ENTITY_UNDECLARED = 0; /** * Constant: the entity is internal. * @see #getEntityType */ public final static int ENTITY_INTERNAL = 1; /** * Constant: the entity is external, non-XML data. * @see #getEntityType */ public final static int ENTITY_NDATA = 2; /** * Constant: the entity is external XML data. * @see #getEntityType */ public final static int ENTITY_TEXT = 3; // // Constants for attribute type. // /** * Constant: the attribute has not been declared for this element type. * @see #getAttributeType */ public final static int ATTRIBUTE_UNDECLARED = 0; /** * Constant: the attribute value is a string value. * @see #getAttributeType */ public final static int ATTRIBUTE_CDATA = 1; /** * Constant: the attribute value is a unique identifier. * @see #getAttributeType */ public final static int ATTRIBUTE_ID = 2; /** * Constant: the attribute value is a reference to a unique identifier. * @see #getAttributeType */ public final static int ATTRIBUTE_IDREF = 3; /** * Constant: the attribute value is a list of ID references. * @see #getAttributeType */ public final static int ATTRIBUTE_IDREFS = 4; /** * Constant: the attribute value is the name of an entity. * @see #getAttributeType */ public final static int ATTRIBUTE_ENTITY = 5; /** * Constant: the attribute value is a list of entity names. * @see #getAttributeType */ public final static int ATTRIBUTE_ENTITIES = 6; /** * Constant: the attribute value is a name token. * @see #getAttributeType */ public final static int ATTRIBUTE_NMTOKEN = 7; /** * Constant: the attribute value is a list of name tokens. * @see #getAttributeType */ public final static int ATTRIBUTE_NMTOKENS = 8; /** * Constant: the attribute value is a token from an enumeration. * @see #getAttributeType */ public final static int ATTRIBUTE_ENUMERATED = 9; /** * Constant: the attribute is the name of a notation. * @see #getAttributeType */ public final static int ATTRIBUTE_NOTATION = 10; // // When the class is loaded, populate the hash table of // attribute types. // /** * Hash table of attribute types. */ private static Hashtable attributeTypeHash; static { attributeTypeHash = new Hashtable(); attributeTypeHash.put("CDATA", new Integer(ATTRIBUTE_CDATA)); attributeTypeHash.put("ID", new Integer(ATTRIBUTE_ID)); attributeTypeHash.put("IDREF", new Integer(ATTRIBUTE_IDREF)); attributeTypeHash.put("IDREFS", new Integer(ATTRIBUTE_IDREFS)); attributeTypeHash.put("ENTITY", new Integer(ATTRIBUTE_ENTITY)); attributeTypeHash.put("ENTITIES", new Integer(ATTRIBUTE_ENTITIES)); attributeTypeHash.put("NMTOKEN", new Integer(ATTRIBUTE_NMTOKEN)); attributeTypeHash.put("NMTOKENS", new Integer(ATTRIBUTE_NMTOKENS)); attributeTypeHash.put("NOTATION", new Integer(ATTRIBUTE_NOTATION)); } // // Constants for supported encodings. // private final static int ENCODING_UTF_8 = 1; private final static int ENCODING_ISO_8859_1 = 2; private final static int ENCODING_UCS_2_12 = 3; private final static int ENCODING_UCS_2_21 = 4; private final static int ENCODING_UCS_4_1234 = 5; private final static int ENCODING_UCS_4_4321 = 6; private final static int ENCODING_UCS_4_2143 = 7; private final static int ENCODING_UCS_4_3412 = 8; // // Constants for attribute default value. // /** * Constant: the attribute is not declared. * @see #getAttributeDefaultValueType */ public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 0; /** * Constant: the attribute has a literal default value specified. * @see #getAttributeDefaultValueType * @see #getAttributeDefaultValue */ public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 1; /** * Constant: the attribute was declared #IMPLIED. * @see #getAttributeDefaultValueType */ public final static int ATTRIBUTE_DEFAULT_IMPLIED = 2; /** * Constant: the attribute was declared #REQUIRED. * @see #getAttributeDefaultValueType */ public final static int ATTRIBUTE_DEFAULT_REQUIRED = 3; /** * Constant: the attribute was declared #FIXED. * @see #getAttributeDefaultValueType * @see #getAttributeDefaultValue */ public final static int ATTRIBUTE_DEFAULT_FIXED = 4; // // Constants for input. // private final static int INPUT_NONE = 0; private final static int INPUT_INTERNAL = 1; private final static int INPUT_EXTERNAL = 2; private final static int INPUT_STREAM = 3; private final static int INPUT_BUFFER = 4; private final static int INPUT_READER = 5; // // Flags for reading literals. // private final static int LIT_CHAR_REF = 1; private final static int LIT_ENTITY_REF = 2; private final static int LIT_PE_REF = 4; private final static int LIT_NORMALIZE = 8; // // Flags for parsing context. // private final static int CONTEXT_NONE = 0; private final static int CONTEXT_DTD = 1; private final static int CONTEXT_ENTITYVALUE = 2; private final static int CONTEXT_ATTRIBUTEVALUE = 3; ////////////////////////////////////////////////////////////////////// // Error reporting. ////////////////////////////////////////////////////////////////////// /** * Report an error. * @param message The error message. * @param textFound The text that caused the error (or null). * @see XmlHandler#error * @see #line */ void error (String message, String textFound, String textExpected) throws java.lang.Exception { errorCount++; if (textFound != null) { message = message + " (found \"" + textFound + "\")"; } if (textExpected != null) { message = message + " (expected \"" + textExpected + "\")"; } if (handler != null) { String uri = null; if (externalEntity != null) { uri = externalEntity.getURL().toString(); } handler.error(message, uri, line, column); } } /** * Report a serious error. * @param message The error message. * @param textFound The text that caused the error (or null). */ void error (String message, char textFound, String textExpected) throws java.lang.Exception { error(message, new Character(textFound).toString(), textExpected); } ////////////////////////////////////////////////////////////////////// // Major syntactic productions. ////////////////////////////////////////////////////////////////////// /** * Parse an XML document. * <pre> * [1] document ::= prolog element Misc* * </pre> * <p>This is the top-level parsing function for a single XML * document. As a minimum, a well-formed document must have * a document element, and a valid document must have a prolog * as well. */ void parseDocument () throws java.lang.Exception { char c; parseProlog(); require('<'); parseElement(); try { parseMisc(); //skip all white, PIs, and comments c=readCh(); //if this doesn't throw an exception... error("unexpected characters after document end",c,null); } catch (EOFException e) {return;} } /** * Skip a comment. * <pre> * [18] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->" * </pre> * <p>(The <code><!--</code> has already been read.) */ void parseComment () throws java.lang.Exception { skipUntil("-->"); } /** * Parse a processing instruction and do a call-back. * <pre> * [19] PI ::= '<?' Name (S (Char* - (Char* '?>' Char*)))? '?>' * </pre> * <p>(The <code><?</code> has already been read.) * <p>An XML processing instruction <em>must</em> begin with * a Name, which is the instruction's target. */ void parsePI () throws java.lang.Exception { String name;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -