📄 xmlparser.java
字号:
// XmlParser.java: the main parser class.
// NO WARRANTY! See README, and copyright below.
// $Id: XmlParser.java,v 1.1.1.1 2000/10/05 22:37:13 fuller Exp $
package com.microstar.xml;
import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Stack;
/**
* Parse XML documents and return parse events through call-backs.
* <p>You need to define a class implementing the <code>XmlHandler</code>
* interface: an object belonging to this class will receive the
* callbacks for the events. (As an alternative to implementing
* the full XmlHandler interface, you can simply extend the
* <code>HandlerBase</code> convenience class.)
* <p>Usage (assuming that <code>MyHandler</code> is your implementation
* of the <code>XmlHandler</code> interface):
* <pre>
* XmlHandler handler = new MyHandler();
* XmlParser parser = new XmlParser();
* parser.setHandler(handler);
* try {
* parser.parse("http://www.host.com/doc.xml", null);
* } catch (Exception e) {
* [do something interesting]
* }
* </pre>
* <p>Alternatively, you can use the standard SAX interfaces
* with the <code>SAXDriver</code> class as your entry point.
* @author Copyright (c) 1997, 1998 by Microstar Software Ltd.
* @author Written by David Megginson <dmeggins@microstar.com>
* @version 1.1
* @see XmlHandler
* @see HandlerBase
* @see SAXDriver
*/
public class XmlParser {
//
// Use special cheats that speed up the code (currently about 50%),
// but may cause problems with future maintenance and add to the
// class file size (about 500 bytes).
//
private final static boolean USE_CHEATS = true;
//////////////////////////////////////////////////////////////////////
// Constructors.
////////////////////////////////////////////////////////////////////////
/**
* Construct a new parser with no associated handler.
* @see #setHandler
* @see #parse
*/
public XmlParser ()
{
}
/**
* Set the handler that will receive parsing events.
* @param handler The handler to receive callback events.
* @see #parse
* @see XmlHandler
*/
public void setHandler (XmlHandler handler)
{
this.handler = handler;
}
/**
* Parse an XML document from a URI.
* <p>You may parse a document more than once, but only one thread
* may call this method for an object at one time.
* @param systemId The URI of the document.
* @param publicId The public identifier of the document, or null.
* @param encoding The suggested encoding, or null if unknown.
* @exception java.lang.Exception Any exception thrown by your
* own handlers, or any derivation of java.io.IOException
* thrown by the parser itself.
*/
public void parse (String systemId, String publicId, String encoding)
throws java.lang.Exception
{
doParse(systemId, publicId, null, null, encoding);
}
/**
* Parse an XML document from a byte stream.
* <p>The URI that you supply will become the base URI for
* resolving relative links, but Ælfred will actually read
* the document from the supplied input stream.
* <p>You may parse a document more than once, but only one thread
* may call this method for an object at one time.
* @param systemId The base URI of the document, or null if not
* known.
* @param publicId The public identifier of the document, or null
* if not known.
* @param stream A byte input stream.
* @param encoding The suggested encoding, or null if unknown.
* @exception java.lang.Exception Any exception thrown by your
* own handlers, or any derivation of java.io.IOException
* thrown by the parser itself.
*/
public void parse (String systemId, String publicId,
InputStream stream, String encoding)
throws java.lang.Exception
{
doParse(systemId, publicId, null, stream, encoding);
}
/**
* Parse an XML document from a character stream.
* <p>The URI that you supply will become the base URI for
* resolving relative links, but Ælfred will actually read
* the document from the supplied input stream.
* <p>You may parse a document more than once, but only one thread
* may call this method for an object at one time.
* @param systemId The base URI of the document, or null if not
* known.
* @param publicId The public identifier of the document, or null
* if not known.
* @param reader A character stream.
* @exception java.lang.Exception Any exception thrown by your
* own handlers, or any derivation of java.io.IOException
* thrown by the parser itself.
*/
public void parse (String systemId, String publicId, Reader reader)
throws java.lang.Exception
{
doParse(systemId, publicId, reader, null, null);
}
private synchronized void doParse (String systemId, String publicId,
Reader reader, InputStream stream,
String encoding)
throws java.lang.Exception
{
basePublicId = publicId;
baseURI = systemId;
baseReader = reader;
baseInputStream = stream;
initializeVariables();
// Set the default entities here.
setInternalEntity(intern("amp"), "&");
setInternalEntity(intern("lt"), "<");
setInternalEntity(intern("gt"), ">");
setInternalEntity(intern("apos"), "'");
setInternalEntity(intern("quot"), """);
if (handler != null) {
handler.startDocument();
}
pushURL("[document]", basePublicId, baseURI, baseReader, baseInputStream,
encoding);
parseDocument();
if (handler != null) {
handler.endDocument();
}
cleanupVariables();
}
////////////////////////////////////////////////////////////////////////
// Constants.
////////////////////////////////////////////////////////////////////////
//
// Constants for element content type.
//
/**
* Constant: an element has not been declared.
* @see #getElementContentType
*/
public final static int CONTENT_UNDECLARED = 0;
/**
* Constant: the element has a content model of ANY.
* @see #getElementContentType
*/
public final static int CONTENT_ANY = 1;
/**
* Constant: the element has declared content of EMPTY.
* @see #getElementContentType
*/
public final static int CONTENT_EMPTY = 2;
/**
* Constant: the element has mixed content.
* @see #getElementContentType
*/
public final static int CONTENT_MIXED = 3;
/**
* Constant: the element has element content.
* @see #getElementContentType
*/
public final static int CONTENT_ELEMENTS = 4;
//
// Constants for the entity type.
//
/**
* Constant: the entity has not been declared.
* @see #getEntityType
*/
public final static int ENTITY_UNDECLARED = 0;
/**
* Constant: the entity is internal.
* @see #getEntityType
*/
public final static int ENTITY_INTERNAL = 1;
/**
* Constant: the entity is external, non-XML data.
* @see #getEntityType
*/
public final static int ENTITY_NDATA = 2;
/**
* Constant: the entity is external XML data.
* @see #getEntityType
*/
public final static int ENTITY_TEXT = 3;
//
// Constants for attribute type.
//
/**
* Constant: the attribute has not been declared for this element type.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_UNDECLARED = 0;
/**
* Constant: the attribute value is a string value.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_CDATA = 1;
/**
* Constant: the attribute value is a unique identifier.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_ID = 2;
/**
* Constant: the attribute value is a reference to a unique identifier.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_IDREF = 3;
/**
* Constant: the attribute value is a list of ID references.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_IDREFS = 4;
/**
* Constant: the attribute value is the name of an entity.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_ENTITY = 5;
/**
* Constant: the attribute value is a list of entity names.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_ENTITIES = 6;
/**
* Constant: the attribute value is a name token.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_NMTOKEN = 7;
/**
* Constant: the attribute value is a list of name tokens.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_NMTOKENS = 8;
/**
* Constant: the attribute value is a token from an enumeration.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_ENUMERATED = 9;
/**
* Constant: the attribute is the name of a notation.
* @see #getAttributeType
*/
public final static int ATTRIBUTE_NOTATION = 10;
//
// When the class is loaded, populate the hash table of
// attribute types.
//
/**
* Hash table of attribute types.
*/
private static Hashtable attributeTypeHash;
static {
attributeTypeHash = new Hashtable();
attributeTypeHash.put("CDATA", new Integer(ATTRIBUTE_CDATA));
attributeTypeHash.put("ID", new Integer(ATTRIBUTE_ID));
attributeTypeHash.put("IDREF", new Integer(ATTRIBUTE_IDREF));
attributeTypeHash.put("IDREFS", new Integer(ATTRIBUTE_IDREFS));
attributeTypeHash.put("ENTITY", new Integer(ATTRIBUTE_ENTITY));
attributeTypeHash.put("ENTITIES", new Integer(ATTRIBUTE_ENTITIES));
attributeTypeHash.put("NMTOKEN", new Integer(ATTRIBUTE_NMTOKEN));
attributeTypeHash.put("NMTOKENS", new Integer(ATTRIBUTE_NMTOKENS));
attributeTypeHash.put("NOTATION", new Integer(ATTRIBUTE_NOTATION));
}
//
// Constants for supported encodings.
//
private final static int ENCODING_UTF_8 = 1;
private final static int ENCODING_ISO_8859_1 = 2;
private final static int ENCODING_UCS_2_12 = 3;
private final static int ENCODING_UCS_2_21 = 4;
private final static int ENCODING_UCS_4_1234 = 5;
private final static int ENCODING_UCS_4_4321 = 6;
private final static int ENCODING_UCS_4_2143 = 7;
private final static int ENCODING_UCS_4_3412 = 8;
//
// Constants for attribute default value.
//
/**
* Constant: the attribute is not declared.
* @see #getAttributeDefaultValueType
*/
public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 0;
/**
* Constant: the attribute has a literal default value specified.
* @see #getAttributeDefaultValueType
* @see #getAttributeDefaultValue
*/
public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 1;
/**
* Constant: the attribute was declared #IMPLIED.
* @see #getAttributeDefaultValueType
*/
public final static int ATTRIBUTE_DEFAULT_IMPLIED = 2;
/**
* Constant: the attribute was declared #REQUIRED.
* @see #getAttributeDefaultValueType
*/
public final static int ATTRIBUTE_DEFAULT_REQUIRED = 3;
/**
* Constant: the attribute was declared #FIXED.
* @see #getAttributeDefaultValueType
* @see #getAttributeDefaultValue
*/
public final static int ATTRIBUTE_DEFAULT_FIXED = 4;
//
// Constants for input.
//
private final static int INPUT_NONE = 0;
private final static int INPUT_INTERNAL = 1;
private final static int INPUT_EXTERNAL = 2;
private final static int INPUT_STREAM = 3;
private final static int INPUT_BUFFER = 4;
private final static int INPUT_READER = 5;
//
// Flags for reading literals.
//
private final static int LIT_CHAR_REF = 1;
private final static int LIT_ENTITY_REF = 2;
private final static int LIT_PE_REF = 4;
private final static int LIT_NORMALIZE = 8;
//
// Flags for parsing context.
//
private final static int CONTEXT_NONE = 0;
private final static int CONTEXT_DTD = 1;
private final static int CONTEXT_ENTITYVALUE = 2;
private final static int CONTEXT_ATTRIBUTEVALUE = 3;
//////////////////////////////////////////////////////////////////////
// Error reporting.
//////////////////////////////////////////////////////////////////////
/**
* Report an error.
* @param message The error message.
* @param textFound The text that caused the error (or null).
* @see XmlHandler#error
* @see #line
*/
void error (String message, String textFound, String textExpected)
throws java.lang.Exception
{
errorCount++;
if (textFound != null) {
message = message + " (found \"" + textFound + "\")";
}
if (textExpected != null) {
message = message + " (expected \"" + textExpected + "\")";
}
if (handler != null) {
String uri = null;
if (externalEntity != null) {
uri = externalEntity.getURL().toString();
}
handler.error(message, uri, line, column);
}
}
/**
* Report a serious error.
* @param message The error message.
* @param textFound The text that caused the error (or null).
*/
void error (String message, char textFound, String textExpected)
throws java.lang.Exception
{
error(message, new Character(textFound).toString(), textExpected);
}
//////////////////////////////////////////////////////////////////////
// Major syntactic productions.
//////////////////////////////////////////////////////////////////////
/**
* Parse an XML document.
* <pre>
* [1] document ::= prolog element Misc*
* </pre>
* <p>This is the top-level parsing function for a single XML
* document. As a minimum, a well-formed document must have
* a document element, and a valid document must have a prolog
* as well.
*/
void parseDocument ()
throws java.lang.Exception
{
char c;
parseProlog();
require('<');
parseElement();
try
{
parseMisc(); //skip all white, PIs, and comments
c=readCh(); //if this doesn't throw an exception...
error("unexpected characters after document end",c,null);
}
catch (EOFException e)
{return;}
}
/**
* Skip a comment.
* <pre>
* [18] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
* </pre>
* <p>(The <code><!--</code> has already been read.)
*/
void parseComment ()
throws java.lang.Exception
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -