xmlparser.java

来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 2,494 行 · 第 1/5 页

JAVA
2,494
字号
/* * Copyright (C) 1999-2001 David Brownell *  * This file is part of GNU JAXP, a library. * * GNU JAXP is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *  * GNU JAXP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. *  * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * * As a special exception, if you link this library with other files to * produce an executable, this library does not by itself cause the * resulting executable to be covered by the GNU General Public License. * This exception does not however invalidate any other reasons why the * executable file might be covered by the GNU General Public License.  *///// Copyright (c) 1997, 1998 by Microstar Software Ltd.// From Microstar's README (the entire original license):////	Separate statements also said it's in the public domain.//	All modifications are distributed under the license//	above (GPL with library exception).//// AElfred is free for both commercial and non-commercial use and// redistribution, provided that Microstar's copyright and disclaimer are// retained intact.  You are free to modify AElfred for your own use and// to redistribute AElfred with your modifications, provided that the// modifications are clearly documented.//// This program is distributed in the hope that it will be useful, but// WITHOUT ANY WARRANTY; without even the implied warranty of// merchantability or fitness for a particular purpose.  Please use it AT// YOUR OWN RISK.//package gnu.xml.aelfred2;import java.io.BufferedInputStream;import java.io.CharConversionException;import java.io.EOFException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.IOException;import java.io.Reader;import java.io.UnsupportedEncodingException;import java.net.URL;import java.net.URLConnection;// maintaining 1.1 compatibility for now ...// Iterator and Hashmap ought to be fasterimport java.util.Enumeration;import java.util.Hashtable;import java.util.Stack;import org.xml.sax.InputSource;import org.xml.sax.SAXException;/** * Parse XML documents and return parse events through call-backs. * Use the <code>SAXDriver</code> class as your entry point, as all * internal parser interfaces are subject to change. * * @author Written by David Megginson &lt;dmeggins@microstar.com&gt; *	(version 1.2a with bugfixes) * @author Updated by David Brownell &lt;dbrownell@users.sourceforge.net&gt; * @see SAXDriver */final class XmlParser{    // avoid slow per-character readCh()    private final static boolean USE_CHEATS = true;    //////////////////////////////////////////////////////////////////////    // Constructors.    ////////////////////////////////////////////////////////////////////////    /**     * Construct a new parser with no associated handler.     * @see #setHandler     * @see #parse     */    // package private    XmlParser ()    {    }    /**     * Set the handler that will receive parsing events.     * @param handler The handler to receive callback events.     * @see #parse     */    // package private    void setHandler (SAXDriver handler)    {	this.handler = handler;    }    /**     * Parse an XML document from the character stream, byte stream, or URI     * that you provide (in that order of preference).  Any URI that you     * supply will become the base URI for resolving relative URI, and may     * be used to acquire a reader or byte stream.     *     * <p> Only one thread at a time may use this parser; since it is     * private to this package, post-parse cleanup is done by the caller,     * which MUST NOT REUSE the parser (just null it).     *     * @param systemId Absolute URI of the document; should never be null,     *	but may be so iff a reader <em>or</em> a stream is provided.     * @param publicId The public identifier of the document, or null.     * @param reader A character stream; must be null if stream isn't.     * @param stream A byte input stream; must be null if reader isn't.     * @param encoding The suggested encoding, or null if unknown.     * @exception java.lang.Exception Basically SAXException or IOException     */    // package private     void doParse (	String		systemId,	String		publicId,	Reader		reader,	InputStream	stream,	String		encoding    ) throws Exception    {	if (handler == null)	    throw new IllegalStateException ("no callback handler");	initializeVariables ();	// predeclare the built-in entities here (replacement texts)	// we don't need to intern(), since we're guaranteed literals	// are always (globally) interned.	setInternalEntity ("amp", "&#38;");	setInternalEntity ("lt", "&#60;");	setInternalEntity ("gt", "&#62;");	setInternalEntity ("apos", "&#39;");	setInternalEntity ("quot", "&#34;");	try {	    // pushURL first to ensure locator is correct in startDocument	    // ... it might report an IO or encoding exception.	    // FIXME that could call endDocument without startDocument!	    pushURL (false, "[document]",			// default baseURI: null		    new String [] { publicId, systemId, null},		    reader, stream, encoding, false);	    handler.startDocument ();	    parseDocument ();	} finally {	    if (reader != null)		try { reader.close ();		} catch (IOException e) { /* ignore */ }	    if (stream != null)		try { stream.close ();		} catch (IOException e) { /* ignore */ }	    if (is != null)		try { is.close ();		} catch (IOException e) { /* ignore */ }	    if (reader != null)		try {		    reader.close ();		} catch (IOException e) { /* ignore */		}	    scratch = null;	}    }    ////////////////////////////////////////////////////////////////////////    // Constants.    ////////////////////////////////////////////////////////////////////////    //    // Constants for element content type.    //    /**     * Constant: an element has not been declared.     * @see #getElementContentType     */    public final static int CONTENT_UNDECLARED = 0;    /**     * Constant: the element has a content model of ANY.     * @see #getElementContentType     */    public final static int CONTENT_ANY = 1;    /**     * Constant: the element has declared content of EMPTY.     * @see #getElementContentType     */    public final static int CONTENT_EMPTY = 2;    /**     * Constant: the element has mixed content.     * @see #getElementContentType     */    public final static int CONTENT_MIXED = 3;    /**     * Constant: the element has element content.     * @see #getElementContentType     */    public final static int CONTENT_ELEMENTS = 4;    //    // Constants for the entity type.    //    /**     * Constant: the entity has not been declared.     * @see #getEntityType     */    public final static int ENTITY_UNDECLARED = 0;    /**     * Constant: the entity is internal.     * @see #getEntityType     */    public final static int ENTITY_INTERNAL = 1;    /**     * Constant: the entity is external, non-parsable data.     * @see #getEntityType     */    public final static int ENTITY_NDATA = 2;    /**     * Constant: the entity is external XML data.     * @see #getEntityType     */    public final static int ENTITY_TEXT = 3;    //    // Attribute type constants are interned literal strings.    //    //    // Constants for supported encodings.  "external" is just a flag.    //    private final static int ENCODING_EXTERNAL = 0;    private final static int ENCODING_UTF_8 = 1;    private final static int ENCODING_ISO_8859_1 = 2;    private final static int ENCODING_UCS_2_12 = 3;    private final static int ENCODING_UCS_2_21 = 4;    private final static int ENCODING_UCS_4_1234 = 5;    private final static int ENCODING_UCS_4_4321 = 6;    private final static int ENCODING_UCS_4_2143 = 7;    private final static int ENCODING_UCS_4_3412 = 8;    private final static int ENCODING_ASCII = 9;    //    // Constants for attribute default value.    //    /**     * Constant: the attribute is not declared.     * @see #getAttributeDefaultValueType     */    public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;    /**     * Constant: the attribute has a literal default value specified.     * @see #getAttributeDefaultValueType     * @see #getAttributeDefaultValue     */    public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;    /**     * Constant: the attribute was declared #IMPLIED.     * @see #getAttributeDefaultValueType     */    public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;    /**     * Constant: the attribute was declared #REQUIRED.     * @see #getAttributeDefaultValueType     */    public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;    /**     * Constant: the attribute was declared #FIXED.     * @see #getAttributeDefaultValueType     * @see #getAttributeDefaultValue     */    public final static int ATTRIBUTE_DEFAULT_FIXED = 34;    //    // Constants for input.    //    private final static int INPUT_NONE = 0;    private final static int INPUT_INTERNAL = 1;    private final static int INPUT_STREAM = 3;    private final static int INPUT_READER = 5;    //    // Flags for reading literals.    //	// expand general entity refs (attribute values in dtd and content)    private final static int LIT_ENTITY_REF = 2;	// normalize this value (space chars) (attributes, public ids)    private final static int LIT_NORMALIZE = 4;	// literal is an attribute value     private final static int LIT_ATTRIBUTE = 8;	// don't expand parameter entities    private final static int LIT_DISABLE_PE = 16;	// don't expand [or parse] character refs    private final static int LIT_DISABLE_CREF = 32;	// don't parse general entity refs    private final static int LIT_DISABLE_EREF = 64;	// literal is a public ID value     private final static int LIT_PUBID = 256;    //    // Flags affecting PE handling in DTDs (if expandPE is true).    // PEs expand with space padding, except inside literals.    //    private final static int CONTEXT_NORMAL = 0;    private final static int CONTEXT_LITERAL = 1;    //////////////////////////////////////////////////////////////////////    // Error reporting.    //////////////////////////////////////////////////////////////////////    /**     * Report an error.     * @param message The error message.     * @param textFound The text that caused the error (or null).     * @see SAXDriver#error     * @see #line     */    private void error (String message, String textFound, String textExpected)    throws SAXException    {	if (textFound != null) {	    message = message + " (found \"" + textFound + "\")";	}	if (textExpected != null) {	    message = message + " (expected \"" + textExpected + "\")";	}	handler.fatal (message);	// "can't happen"	throw new SAXException (message);    }    /**     * Report a serious error.     * @param message The error message.     * @param textFound The text that caused the error (or null).     */    private void error (String message, char textFound, String textExpected)    throws SAXException    {	error (message, new Character (textFound).toString (), textExpected);    }    /** Report typical case fatal errors. */    private void error (String message)    throws SAXException    {	handler.fatal (message);    }    //////////////////////////////////////////////////////////////////////    // Major syntactic productions.    //////////////////////////////////////////////////////////////////////    /**     * Parse an XML document.     * <pre>     * [1] document ::= prolog element Misc*     * </pre>     * <p>This is the top-level parsing function for a single XML     * document.  As a minimum, a well-formed document must have     * a document element, and a valid document must have a prolog     * (one with doctype) as well.     */    private void parseDocument ()    throws Exception    {        try {                                       // added by MHK    	    boolean sawDTD = parseProlog ();    	    require ('<');    	    parseElement (!sawDTD);        } catch (EOFException ee) {                 // added by MHK            error("premature end of file", "[EOF]", null);        }            	try {    	    parseMisc ();   //skip all white, PIs, and comments    	    char c = readCh ();    //if this doesn't throw an exception...    	    error ("unexpected characters after document end", c, null);    	} catch (EOFException e) {    	    return;    	}    }    static final char	startDelimComment [] = { '<', '!', '-', '-' };    static final char	endDelimComment [] = { '-', '-' };    /**     * Skip a comment.     * <pre>     * [15] Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* "-->"     * </pre>     * <p> (The <code>&lt;!--</code> has already been read.)     */    private void parseComment ()    throws Exception    {	char c;	boolean saved = expandPE;	expandPE = false;	parseUntil (endDelimComment);	require ('>');	expandPE = saved;	handler.comment (dataBuffer, 0, dataBufferPos);	dataBufferPos = 0;    }    static final char	startDelimPI [] = { '<', '?' };    static final char	endDelimPI [] = { '?', '>' };    /**     * Parse a processing instruction and do a call-back.     * <pre>     * [16] PI ::= '&lt;?' PITarget     *		(S (Char* - (Char* '?&gt;' Char*)))?     *		'?&gt;'     * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )     * </pre>     * <p> (The <code>&lt;?</code> has already been read.)     */    private void parsePI ()    throws SAXException, IOException    {	String name;	boolean saved = expandPE;	expandPE = false;	name = readNmtoken (true);	if ("xml".equalsIgnoreCase (name))	    error ("Illegal processing instruction target", name, null);	if (!tryRead (endDelimPI)) {	    requireWhitespace ();	    parseUntil (endDelimPI);	}	expandPE = saved;	handler.processingInstruction (name, dataBufferToString ());    }    static final char	endDelimCDATA [] = { ']', ']', '>' };    /**     * Parse a CDATA section.     * <pre>     * [18] CDSect ::= CDStart CData CDEnd     * [19] CDStart ::= '&lt;![CDATA['     * [20] CData ::= (Char* - (Char* ']]&gt;' Char*))     * [21] CDEnd ::= ']]&gt;'     * </pre>     * <p> (The '&lt;![CDATA[' has already been read.)     */    private void parseCDSect ()    throws Exception    {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?