📄 xmlparser.java
字号:
/* XMLParser.java -- Copyright (C) 2005 Free Software Foundation, Inc.This file is part of GNU Classpath.GNU Classpath is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU Classpath is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU Classpath; see the file COPYING. If not, write to theFree Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA02110-1301 USA.Linking this library statically or dynamically with other modules ismaking a combined work based on this library. Thus, the terms andconditions of the GNU General Public License cover the wholecombination.As a special exception, the copyright holders of this library give youpermission to link this library with independent modules to produce anexecutable, regardless of the license terms of these independentmodules, and to copy and distribute the resulting executable underterms of your choice, provided that you also meet, for each linkedindependent module, the terms and conditions of the license of thatmodule. An independent module is a module which is not derived fromor based on this library. If you modify this library, you may extendthis exception to your version of the library, but you are notobligated to do so. If you do not wish to do so, delete thisexception statement from your version.Partly derived from code which carried the following notice: Copyright (c) 1997, 1998 by Microstar Software Ltd. AElfred is free for both commercial and non-commercial use and redistribution, provided that Microstar's copyright and disclaimer are retained intact. You are free to modify AElfred for your own use and to redistribute AElfred with your modifications, provided that the modifications are clearly documented. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of merchantability or fitness for a particular purpose. Please use it AT YOUR OWN RISK.*/package gnu.xml.stream;import java.io.BufferedInputStream;import java.io.EOFException;import java.io.File;import java.io.InputStream;import java.io.InputStreamReader;import java.io.IOException;import java.io.Reader;import java.io.StringReader;import java.io.UnsupportedEncodingException;import java.net.MalformedURLException;import java.net.URL;import java.util.ArrayList;import java.util.Collections;import java.util.HashSet;import java.util.Iterator;import java.util.LinkedHashMap;import java.util.LinkedList;import java.util.Map;import java.util.NoSuchElementException;import java.util.StringTokenizer;import javax.xml.XMLConstants;import javax.xml.namespace.NamespaceContext;import javax.xml.namespace.QName;import javax.xml.stream.Location;import javax.xml.stream.XMLInputFactory;import javax.xml.stream.XMLReporter;import javax.xml.stream.XMLResolver;import javax.xml.stream.XMLStreamConstants;import javax.xml.stream.XMLStreamException;import javax.xml.stream.XMLStreamReader;import gnu.java.net.CRLFInputStream;/** * An XML parser. * This parser supports the following additional StAX properties: * <table> * <tr><td>gnu.xml.stream.stringInterning</td> * <td>Boolean</td> * <td>Indicates whether markup strings will be interned</td></tr> * <tr><td>gnu.xml.stream.xmlBase</td> * <td>Boolean</td> * <td>Indicates whether XML Base processing will be performed</td></tr> * <tr><td>gnu.xml.stream.baseURI</td> * <td>String</td> * <td>Returns the base URI of the current event</td></tr> * </table> * * @see http://www.w3.org/TR/REC-xml/ * @see http://www.w3.org/TR/xml11/ * @see http://www.w3.org/TR/REC-xml-names * @see http://www.w3.org/TR/xml-names11 * @see http://www.w3.org/TR/xmlbase/ * * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a> */public class XMLParser implements XMLStreamReader, NamespaceContext{ // -- parser state machine states -- private static final int INIT = 0; // start state private static final int PROLOG = 1; // in prolog private static final int CONTENT = 2; // in content private static final int EMPTY_ELEMENT = 3; // empty element state private static final int MISC = 4; // in Misc (after root element) // -- parameters for parsing literals -- private final static int LIT_ENTITY_REF = 2; private final static int LIT_NORMALIZE = 4; private final static int LIT_ATTRIBUTE = 8; private final static int LIT_DISABLE_PE = 16; private final static int LIT_DISABLE_CREF = 32; private final static int LIT_DISABLE_EREF = 64; private final static int LIT_PUBID = 256; // -- types of attribute values -- final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; final static int ATTRIBUTE_DEFAULT_FIXED = 34; // -- additional event types -- final static int START_ENTITY = 50; final static int END_ENTITY = 51; /** * The current input. */ private Input input; /** * Stack of inputs representing XML general entities. * The input representing the XML input stream or reader is always the * first element in this stack. */ private LinkedList inputStack = new LinkedList(); /** * Stack of start-entity events to be reported. */ private LinkedList startEntityStack = new LinkedList(); /** * Stack of end-entity events to be reported. */ private LinkedList endEntityStack = new LinkedList(); /** * Current parser state within the main state machine. */ private int state = INIT; /** * The (type of the) current event. */ private int event; /** * Whether we are looking ahead. Used by hasNext. */ private boolean lookahead; /** * The element name stack. The first element in this stack will be the * root element. */ private LinkedList stack = new LinkedList(); /** * Stack of namespace contexts. These are maps specifying prefix-to-URI * mappings. The first element in this stack is the most recent namespace * context (i.e. the other way around from the element name stack). */ private LinkedList namespaces = new LinkedList(); /** * The base-URI stack. This holds the base URI context for each element. * The first element in this stack is the most recent context (i.e. the * other way around from the element name stack). */ private LinkedList bases = new LinkedList(); /** * The list of attributes for the current element, in the order defined in * the XML stream. */ private ArrayList attrs = new ArrayList(); /** * Buffer for text and character data. */ private StringBuffer buf = new StringBuffer(); /** * Buffer for NMTOKEN strings (markup). */ private StringBuffer nmtokenBuf = new StringBuffer(); /** * Buffer for string literals. (e.g. attribute values) */ private StringBuffer literalBuf = new StringBuffer(); /** * Temporary Unicode character buffer used during character data reads. */ private int[] tmpBuf = new int[1024]; /** * The element content model for the current element. */ private ContentModel currentContentModel; /** * The validation stack. This holds lists of the elements seen for each * element, in order to determine whether the names and order of these * elements match the content model for the element. The last entry in * this stack represents the current element. */ private LinkedList validationStack; /** * These sets contain the IDs and the IDREFs seen in the document, to * ensure that IDs are unique and that each IDREF refers to an ID in the * document. */ private HashSet ids, idrefs; /** * The target and data associated with the current processing instruction * event. */ private String piTarget, piData; /** * The XML version declared in the XML declaration. */ private String xmlVersion; /** * The encoding declared in the XML declaration. */ private String xmlEncoding; /** * The standalone value declared in the XML declaration. */ private Boolean xmlStandalone; /** * The document type definition. */ Doctype doctype; /** * State variables for determining parameter-entity expansion. */ private boolean expandPE, peIsError; /** * Whether this is a validating parser. */ private final boolean validating; /** * Whether strings representing markup will be interned. */ private final boolean stringInterning; /** * If true, CDATA sections will be merged with adjacent text nodes into a * single event. */ private final boolean coalescing; /** * Whether to replace general entity references with their replacement * text automatically during parsing. * Otherwise entity-reference events will be issued. */ private final boolean replaceERefs; /** * Whether to support external entities. */ private final boolean externalEntities; /** * Whether to support DTDs. */ private final boolean supportDTD; /** * Whether to support XML namespaces. If true, namespace information will * be available. Otherwise namespaces will simply be reported as ordinary * attributes. */ private final boolean namespaceAware; /** * Whether to support XML Base. If true, URIs specified in xml:base * attributes will be honoured when resolving external entities. */ private final boolean baseAware; /** * Whether to report extended event types (START_ENTITY and END_ENTITY) * in addition to the standard event types. Used by the SAX parser. */ private final boolean extendedEventTypes; /** * The reporter to receive parsing warnings. */ final XMLReporter reporter; /** * Callback interface for resolving external entities. */ final XMLResolver resolver; // -- Constants for testing the next kind of markup event -- private static final String TEST_START_ELEMENT = "<"; private static final String TEST_END_ELEMENT = "</"; private static final String TEST_COMMENT = "<!--"; private static final String TEST_PI = "<?"; private static final String TEST_CDATA = "<![CDATA["; private static final String TEST_XML_DECL = "<?xml"; private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE"; private static final String TEST_ELEMENT_DECL = "<!ELEMENT"; private static final String TEST_ATTLIST_DECL = "<!ATTLIST"; private static final String TEST_ENTITY_DECL = "<!ENTITY"; private static final String TEST_NOTATION_DECL = "<!NOTATION"; private static final String TEST_KET = ">"; private static final String TEST_END_COMMENT = "--"; private static final String TEST_END_PI = "?>"; private static final String TEST_END_CDATA = "]]>"; /** * The general entities predefined by the XML specification. */ private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap(); static { PREDEFINED_ENTITIES.put("amp", "&"); PREDEFINED_ENTITIES.put("lt", "<"); PREDEFINED_ENTITIES.put("gt", ">"); PREDEFINED_ENTITIES.put("apos", "'"); PREDEFINED_ENTITIES.put("quot", "\""); } /** * Creates a new XML parser for the given input stream. * This constructor should be used where possible, as it allows the * encoding of the XML data to be correctly determined from the stream. * @param in the input stream * @param systemId the URL from which the input stream was retrieved * (necessary if there are external entities to be resolved) * @param validating if the parser is to be a validating parser * @param namespaceAware if the parser should support XML Namespaces * @param coalescing if CDATA sections should be merged into adjacent text * nodes * @param replaceERefs if entity references should be automatically * replaced by their replacement text (otherwise they will be reported as * entity-reference events) * @param externalEntities if external entities should be loaded * @param supportDTD if support for the XML DTD should be enabled * @param baseAware if the parser should support XML Base to resolve * external entities * @param stringInterning whether strings will be interned during parsing * @param reporter the reporter to receive warnings during processing * @param resolver the callback interface used to resolve external * entities */ public XMLParser(InputStream in, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver) { this.validating = validating; this.namespaceAware = namespaceAware; this.coalescing = coalescing; this.replaceERefs = replaceERefs; this.externalEntities = externalEntities; this.supportDTD = supportDTD; this.baseAware = baseAware; this.stringInterning = stringInterning; this.extendedEventTypes = extendedEventTypes; this.reporter = reporter; this.resolver = resolver; if (validating) { validationStack = new LinkedList(); ids = new HashSet(); idrefs = new HashSet(); } pushInput(new Input(in, null, null, systemId, null, null, false, true)); } /** * Creates a new XML parser for the given character stream. * This constructor is only available for compatibility with the JAXP * APIs, which permit XML to be parsed from a character stream. Because * the encoding specified by the character stream may conflict with that * specified in the XML declaration, this method should be avoided where * possible. * @param in the input stream * @param systemId the URL from which the input stream was retrieved * (necessary if there are external entities to be resolved) * @param validating if the parser is to be a validating parser * @param namespaceAware if the parser should support XML Namespaces * @param coalescing if CDATA sections should be merged into adjacent text * nodes * @param replaceERefs if entity references should be automatically * replaced by their replacement text (otherwise they will be reported as * entity-reference events) * @param externalEntities if external entities should be loaded * @param supportDTD if support for the XML DTD should be enabled * @param baseAware if the parser should support XML Base to resolve * external entities * @param stringInterning whether strings will be interned during parsing * @param reporter the reporter to receive warnings during processing * @param resolver the callback interface used to resolve external * entities */ public XMLParser(Reader reader, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -