xmlparser.java
来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 2,494 行 · 第 1/5 页
JAVA
2,494 行
/* * Copyright (C) 1999-2001 David Brownell * * This file is part of GNU JAXP, a library. * * GNU JAXP is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GNU JAXP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * As a special exception, if you link this library with other files to * produce an executable, this library does not by itself cause the * resulting executable to be covered by the GNU General Public License. * This exception does not however invalidate any other reasons why the * executable file might be covered by the GNU General Public License. *///// Copyright (c) 1997, 1998 by Microstar Software Ltd.// From Microstar's README (the entire original license)://// Separate statements also said it's in the public domain.// All modifications are distributed under the license// above (GPL with library exception).//// AElfred is free for both commercial and non-commercial use and// redistribution, provided that Microstar's copyright and disclaimer are// retained intact. You are free to modify AElfred for your own use and// to redistribute AElfred with your modifications, provided that the// modifications are clearly documented.//// This program is distributed in the hope that it will be useful, but// WITHOUT ANY WARRANTY; without even the implied warranty of// merchantability or fitness for a particular purpose. Please use it AT// YOUR OWN RISK.//package gnu.xml.aelfred2;import java.io.BufferedInputStream;import java.io.CharConversionException;import java.io.EOFException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.IOException;import java.io.Reader;import java.io.UnsupportedEncodingException;import java.net.URL;import java.net.URLConnection;// maintaining 1.1 compatibility for now ...// Iterator and Hashmap ought to be fasterimport java.util.Enumeration;import java.util.Hashtable;import java.util.Stack;import org.xml.sax.InputSource;import org.xml.sax.SAXException;/** * Parse XML documents and return parse events through call-backs. * Use the <code>SAXDriver</code> class as your entry point, as all * internal parser interfaces are subject to change. * * @author Written by David Megginson <dmeggins@microstar.com> * (version 1.2a with bugfixes) * @author Updated by David Brownell <dbrownell@users.sourceforge.net> * @see SAXDriver */final class XmlParser{ // avoid slow per-character readCh() private final static boolean USE_CHEATS = true; ////////////////////////////////////////////////////////////////////// // Constructors. //////////////////////////////////////////////////////////////////////// /** * Construct a new parser with no associated handler. * @see #setHandler * @see #parse */ // package private XmlParser () { } /** * Set the handler that will receive parsing events. * @param handler The handler to receive callback events. * @see #parse */ // package private void setHandler (SAXDriver handler) { this.handler = handler; } /** * Parse an XML document from the character stream, byte stream, or URI * that you provide (in that order of preference). Any URI that you * supply will become the base URI for resolving relative URI, and may * be used to acquire a reader or byte stream. * * <p> Only one thread at a time may use this parser; since it is * private to this package, post-parse cleanup is done by the caller, * which MUST NOT REUSE the parser (just null it). * * @param systemId Absolute URI of the document; should never be null, * but may be so iff a reader <em>or</em> a stream is provided. * @param publicId The public identifier of the document, or null. * @param reader A character stream; must be null if stream isn't. * @param stream A byte input stream; must be null if reader isn't. * @param encoding The suggested encoding, or null if unknown. * @exception java.lang.Exception Basically SAXException or IOException */ // package private void doParse ( String systemId, String publicId, Reader reader, InputStream stream, String encoding ) throws Exception { if (handler == null) throw new IllegalStateException ("no callback handler"); initializeVariables (); // predeclare the built-in entities here (replacement texts) // we don't need to intern(), since we're guaranteed literals // are always (globally) interned. setInternalEntity ("amp", "&"); setInternalEntity ("lt", "<"); setInternalEntity ("gt", ">"); setInternalEntity ("apos", "'"); setInternalEntity ("quot", """); try { // pushURL first to ensure locator is correct in startDocument // ... it might report an IO or encoding exception. // FIXME that could call endDocument without startDocument! pushURL (false, "[document]", // default baseURI: null new String [] { publicId, systemId, null}, reader, stream, encoding, false); handler.startDocument (); parseDocument (); } finally { if (reader != null) try { reader.close (); } catch (IOException e) { /* ignore */ } if (stream != null) try { stream.close (); } catch (IOException e) { /* ignore */ } if (is != null) try { is.close (); } catch (IOException e) { /* ignore */ } if (reader != null) try { reader.close (); } catch (IOException e) { /* ignore */ } scratch = null; } } //////////////////////////////////////////////////////////////////////// // Constants. //////////////////////////////////////////////////////////////////////// // // Constants for element content type. // /** * Constant: an element has not been declared. * @see #getElementContentType */ public final static int CONTENT_UNDECLARED = 0; /** * Constant: the element has a content model of ANY. * @see #getElementContentType */ public final static int CONTENT_ANY = 1; /** * Constant: the element has declared content of EMPTY. * @see #getElementContentType */ public final static int CONTENT_EMPTY = 2; /** * Constant: the element has mixed content. * @see #getElementContentType */ public final static int CONTENT_MIXED = 3; /** * Constant: the element has element content. * @see #getElementContentType */ public final static int CONTENT_ELEMENTS = 4; // // Constants for the entity type. // /** * Constant: the entity has not been declared. * @see #getEntityType */ public final static int ENTITY_UNDECLARED = 0; /** * Constant: the entity is internal. * @see #getEntityType */ public final static int ENTITY_INTERNAL = 1; /** * Constant: the entity is external, non-parsable data. * @see #getEntityType */ public final static int ENTITY_NDATA = 2; /** * Constant: the entity is external XML data. * @see #getEntityType */ public final static int ENTITY_TEXT = 3; // // Attribute type constants are interned literal strings. // // // Constants for supported encodings. "external" is just a flag. // private final static int ENCODING_EXTERNAL = 0; private final static int ENCODING_UTF_8 = 1; private final static int ENCODING_ISO_8859_1 = 2; private final static int ENCODING_UCS_2_12 = 3; private final static int ENCODING_UCS_2_21 = 4; private final static int ENCODING_UCS_4_1234 = 5; private final static int ENCODING_UCS_4_4321 = 6; private final static int ENCODING_UCS_4_2143 = 7; private final static int ENCODING_UCS_4_3412 = 8; private final static int ENCODING_ASCII = 9; // // Constants for attribute default value. // /** * Constant: the attribute is not declared. * @see #getAttributeDefaultValueType */ public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; /** * Constant: the attribute has a literal default value specified. * @see #getAttributeDefaultValueType * @see #getAttributeDefaultValue */ public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; /** * Constant: the attribute was declared #IMPLIED. * @see #getAttributeDefaultValueType */ public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; /** * Constant: the attribute was declared #REQUIRED. * @see #getAttributeDefaultValueType */ public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; /** * Constant: the attribute was declared #FIXED. * @see #getAttributeDefaultValueType * @see #getAttributeDefaultValue */ public final static int ATTRIBUTE_DEFAULT_FIXED = 34; // // Constants for input. // private final static int INPUT_NONE = 0; private final static int INPUT_INTERNAL = 1; private final static int INPUT_STREAM = 3; private final static int INPUT_READER = 5; // // Flags for reading literals. // // expand general entity refs (attribute values in dtd and content) private final static int LIT_ENTITY_REF = 2; // normalize this value (space chars) (attributes, public ids) private final static int LIT_NORMALIZE = 4; // literal is an attribute value private final static int LIT_ATTRIBUTE = 8; // don't expand parameter entities private final static int LIT_DISABLE_PE = 16; // don't expand [or parse] character refs private final static int LIT_DISABLE_CREF = 32; // don't parse general entity refs private final static int LIT_DISABLE_EREF = 64; // literal is a public ID value private final static int LIT_PUBID = 256; // // Flags affecting PE handling in DTDs (if expandPE is true). // PEs expand with space padding, except inside literals. // private final static int CONTEXT_NORMAL = 0; private final static int CONTEXT_LITERAL = 1; ////////////////////////////////////////////////////////////////////// // Error reporting. ////////////////////////////////////////////////////////////////////// /** * Report an error. * @param message The error message. * @param textFound The text that caused the error (or null). * @see SAXDriver#error * @see #line */ private void error (String message, String textFound, String textExpected) throws SAXException { if (textFound != null) { message = message + " (found \"" + textFound + "\")"; } if (textExpected != null) { message = message + " (expected \"" + textExpected + "\")"; } handler.fatal (message); // "can't happen" throw new SAXException (message); } /** * Report a serious error. * @param message The error message. * @param textFound The text that caused the error (or null). */ private void error (String message, char textFound, String textExpected) throws SAXException { error (message, new Character (textFound).toString (), textExpected); } /** Report typical case fatal errors. */ private void error (String message) throws SAXException { handler.fatal (message); } ////////////////////////////////////////////////////////////////////// // Major syntactic productions. ////////////////////////////////////////////////////////////////////// /** * Parse an XML document. * <pre> * [1] document ::= prolog element Misc* * </pre> * <p>This is the top-level parsing function for a single XML * document. As a minimum, a well-formed document must have * a document element, and a valid document must have a prolog * (one with doctype) as well. */ private void parseDocument () throws Exception { try { // added by MHK boolean sawDTD = parseProlog (); require ('<'); parseElement (!sawDTD); } catch (EOFException ee) { // added by MHK error("premature end of file", "[EOF]", null); } try { parseMisc (); //skip all white, PIs, and comments char c = readCh (); //if this doesn't throw an exception... error ("unexpected characters after document end", c, null); } catch (EOFException e) { return; } } static final char startDelimComment [] = { '<', '!', '-', '-' }; static final char endDelimComment [] = { '-', '-' }; /** * Skip a comment. * <pre> * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->" * </pre> * <p> (The <code><!--</code> has already been read.) */ private void parseComment () throws Exception { char c; boolean saved = expandPE; expandPE = false; parseUntil (endDelimComment); require ('>'); expandPE = saved; handler.comment (dataBuffer, 0, dataBufferPos); dataBufferPos = 0; } static final char startDelimPI [] = { '<', '?' }; static final char endDelimPI [] = { '?', '>' }; /** * Parse a processing instruction and do a call-back. * <pre> * [16] PI ::= '<?' PITarget * (S (Char* - (Char* '?>' Char*)))? * '?>' * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') ) * </pre> * <p> (The <code><?</code> has already been read.) */ private void parsePI () throws SAXException, IOException { String name; boolean saved = expandPE; expandPE = false; name = readNmtoken (true); if ("xml".equalsIgnoreCase (name)) error ("Illegal processing instruction target", name, null); if (!tryRead (endDelimPI)) { requireWhitespace (); parseUntil (endDelimPI); } expandPE = saved; handler.processingInstruction (name, dataBufferToString ()); } static final char endDelimCDATA [] = { ']', ']', '>' }; /** * Parse a CDATA section. * <pre> * [18] CDSect ::= CDStart CData CDEnd * [19] CDStart ::= '<![CDATA[' * [20] CData ::= (Char* - (Char* ']]>' Char*)) * [21] CDEnd ::= ']]>' * </pre> * <p> (The '<![CDATA[' has already been read.) */ private void parseCDSect () throws Exception {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?