📄 xmlparser.java
字号:
/* Copyright (c) 2006 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package com.google.gdata.util;import com.google.gdata.util.common.xml.XmlWriter;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.ParserAdapter;import java.io.IOException;import java.io.InputStream;import java.io.Reader;import java.io.StringWriter;import java.net.URI;import java.net.URISyntaxException;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Stack;import java.util.logging.Level;import java.util.logging.Logger;import javax.xml.parsers.ParserConfigurationException;import javax.xml.parsers.SAXParser;import javax.xml.parsers.SAXParserFactory;/** * XML parser. * <p> * This is a thin layer on top of a SAX parser. The key concept * necessary to understand this parser is <i>Element Handler</i>. * Element handlers are type-specific parsers. Each handler instance * contains an instance of the Java type corresponding to the XML * type it parses. At any given time, one handler is active, and zero * or more handlers are kept on the stack. This corresponds directly * to the set of currently opened XML tags. * <p> * To use this parser, one must define an {@link * XmlParser.ElementHandler} type (usually one per XML schema type), * specify the root element handler, and pass a reader to the * {@link #parse(Reader, com.google.gdata.util.XmlParser.ElementHandler, String, * String)} method. * <p> * * * * @see XmlParser.ElementHandler */public class XmlParser extends DefaultHandler { private static final Logger logger = Logger.getLogger(XmlParser.class.getName()); // The SAXParserFactory used to create underlying SAXParser instances. private static SAXParserFactory parserFactory; /** * Defines a Java system property that can be set to override the * default JAXP SAX parser factory mechanism and guarantee that a * specific parser will be used. If set, the value should be the * name of the SAXParserFactory implementation that should be used. */ public static final String SAX_PARSER_PROPERTY = "com.google.gdata.SAXParserFactory"; // The JDK system property for JAXP parser configuration. private static final String JDK_PARSER_PROPERTY = "javax.xml.parsers.SAXParserFactory"; // This method must be synchronized because it is not otherwise thread // safe due to system property manipulation; this is expensive, but // the method will only be used once (or a small number of times) // during XmlParser initialization. private static synchronized SAXParserFactory getSAXParserFactory() throws ParserConfigurationException, SAXException { String saxParserFactory = System.getProperty(SAX_PARSER_PROPERTY); if (saxParserFactory == null) return SAXParserFactory.newInstance(); // Temporarily override the JDK parser selection system property // and create a parser based upon the request implementation. // Doing transient setting of a system property is unfortunate, // but JAXP provides no other explicit way to influence the // factory selection. This should only happen once (or a very // small number of times) around the first usage(s) of XmlParser. String origParserFactory = System.getProperty(JDK_PARSER_PROPERTY); try { System.setProperty(JDK_PARSER_PROPERTY, saxParserFactory); return SAXParserFactory.newInstance(); } finally { if (origParserFactory == null) { System.clearProperty(JDK_PARSER_PROPERTY); } else { System.setProperty(JDK_PARSER_PROPERTY, origParserFactory); } } } /** * Base class for custom element handlers. * <p> * To implement a new element handler, one must create a new class * extending this class, override {@link #getChildHandler} if nested * elements need to be parsed, override {@link #processAttribute} if * attributes need to be parsed, and override {@link * #processEndElement()} to receive the text() value and post-process * the element. * <p> * If the handler wishes to store unrecognized XML contents in an {@link * XmlBlob} value, it must call {@link #initializeXmlBlob} either in the * constructor, in parent's {@link #getChildHandler}, or in {@link * #processAttribute}. The resulting {@link XmlBlob} value is available * following the invocation of {@link #processEndElement()} * through the object passed to {@link #initializeXmlBlob}. * <p> * * This class implements overridable methods to support unrecognized XML * parsing if desired. * * */ public static class ElementHandler { /** This element's QName. Used for error reporting. */ public String qName; /** This element's text() value. */ public String value; /** * The current state of {@code xml:lang}. * See http://www.w3.org/TR/REC-xml/#sec-lang-tag for more information. */ public String xmlLang; /** * The current state of {@code xml:base}. * See http://www.cafeconleche.org/books/xmljava/chapters/ch03s03.html for * more information. */ public String xmlBase; /** Keeps track of the element stack. */ ElementHandler parent; /** * If the handler is parsing unrecognized XML, this object stores the * output. */ XmlBlob xmlBlob = null; /** * Flag indicating whether it's still OK to call {@link #initializeXmlBlob}. */ boolean okToInitializeXmlBlob = true; /** Flag indicating whether mixed content unrecognized XML is allowed. */ boolean mixedContent = false; /** * Flag indicating whether unrecognized XML should be processed for * full-text indexing. If set, the resulting string ready for indexing is * stored in {@link XmlBlob#fullText}. Non-contiguous strings within this * index are separated by '\n'. */ boolean fullTextIndex = false; /** This element's inner XML writer. Used internally by XmlParser. */ XmlWriter innerXml; /** Namespaces used by this blob. */ HashSet blobNamespaces = new HashSet(); /** String writer underlying {@link #innerXml}. */ StringWriter innerXmlStringWriter; /** String writer underlying the full-text index string. */ StringWriter fullTextIndexWriter; /** * Determines a handler for a child element. * <p> * * The default implementation doesn't recognize anything. The result is a * schema error <i>unless</i> the parent handler accepts unrecognized XML. * * @param namespace * Child element namespace URI. * * @param localName * Child element name. * * @param attrs * Child element attributes. These attributes will be * communicated to the child element handler through its * {@link #processAttribute} method. They are passed here because * sometimes the value of some attribute determines the element's * content type, so different element handlers may be needed. * * @return Child element handler, or {@code null} if the child is * unrecognized. * * @throws ParseException * Invalid child element. * * @throws IOException * Internal I/O exception (e.g., thrown by XML blob writer). */ public ElementHandler getChildHandler(String namespace, String localName, Attributes attrs) throws ParseException, IOException { if (xmlBlob == null) { throw new ParseException("Unrecognized element '" + localName + "'."); } else { logger.info("No child handler for " + localName + ". Treating as an extension element."); return null; } } /** * Called to process an attribute. Designed to be overridden by derived * classes. * * @param namespace * Attribute namespace URI. * * @param localName * Attribute name. * * @param value * Attribute value. * * @throws ParseException * Invalid attribute. */ public void processAttribute(String namespace, String localName, String value) throws ParseException {} /** * Called to process this element when the closing tag is encountered. * The default implementation refuses to accept text() content, unless * the handler is configured to accept unrecognized XML with mixed content. */ public void processEndElement() throws ParseException { if (value != null && !value.trim().equals("") && !mixedContent) { throw new ParseException( "This element must not have any text() data."); } } /** * If a derived class wishes to retrieve all unrecognized XML in a blob, * it calls this method. It must be called in the constructor, in * the parent element handler, or in {@link #processAttribute}. * * @param xmlBlob * Supplies the XML blob that stores the resulting XML. * * @param mixedContent * Specifies that the handler accepts mixed content XML. * * @param fullTextIndex * Flag indicating whether unrecognized XML should be processed * for full-text indexing. If set, the resulting string ready for * indexing is stored in {@link XmlBlob#fullText}. */ public void initializeXmlBlob(XmlBlob xmlBlob, boolean mixedContent, boolean fullTextIndex) throws IOException { assert okToInitializeXmlBlob; this.xmlBlob = xmlBlob; this.mixedContent = mixedContent; this.innerXmlStringWriter = new StringWriter(); this.innerXml = new XmlWriter(innerXmlStringWriter); this.fullTextIndex = fullTextIndex; if (fullTextIndex) { this.fullTextIndexWriter = new StringWriter(); } } /** * Utility routine that combines the current state of {@code xml:base} * with the specified URI to obtain an absolute URI. * <p> * * See http://www.cafeconleche.org/books/xmljava/chapters/ch03s03.html * for more information. * * @param uriValue * URI to be interpreted in the context of {@code xml:base}. * * @return Corresponding absolute URI. * * @throws ParseException * Invalid URI. */ public String getAbsoluteUri(String uriValue) throws ParseException { try { return getCumulativeXmlBase(xmlBase, uriValue); } catch (URISyntaxException e) { throw new ParseException(e.getMessage()); } } /** * Utility method to return the value of an xsd:boolean attribute. * * @param attrs * Elements attributes to test against. * * @param attrName * Attribute name. * * @return the Boolean value if the attribute is present, or * {@code null} otherwise. * * @throws ParseException if attribute value is not valid xsd:boolean. */ public Boolean getBooleanAttribute(Attributes attrs, String attrName) throws ParseException { String value = attrs.getValue("", attrName); if (value == null) { return null; } if (value.equalsIgnoreCase("false") || value.equals("0")) { return Boolean.FALSE; } if (value.equalsIgnoreCase("true") || value.equals("1")) { return Boolean.TRUE; } throw new ParseException("Invalid value for " + attrName + " attribute: " + value); } } /** Root element handler. */ protected ElementHandler rootHandler; /** Root element namespace URI. */ protected String rootNamespace; /** Root element name. */ protected String rootElementName; /** Top of the element handler stack. */ ElementHandler curHandler; /** Number of unrecognized elements on the stack. */ int unrecognizedElements = 0; /** Document locator used to get line and column numbers for SAX events. */ Locator locator; /** * Set of all namespace declarations valid at the current location. * Includes namespace declarations from all ancestor elements. */ protected HashMap<String, Stack<XmlWriter.Namespace>> namespaceMap = new HashMap<String, Stack<XmlWriter.Namespace>>(); /** * Namespace declarations for the current element. * Valid during {@link #startElement}. */ ArrayList<XmlWriter.Namespace> namespaceDecls = new ArrayList<XmlWriter.Namespace>(); /** * Parses XML. * * @param reader * Supplies the XML to parse. * * @param rootHandler * The root element handler corresponding to the expected document * type. * * @param rootNamespace * Root element namespace URI. * * @param rootElementName * Root element name. * * @throws IOException * Thrown by {@code reader}. * * @throws ParseException * XML failed to validate against the schema implemented by * {@code rootHandler}. */ public void parse(Reader reader, ElementHandler rootHandler, String rootNamespace, String rootElementName) throws IOException, ParseException { InputSource is = new InputSource(reader); this.rootHandler = rootHandler; this.rootNamespace = rootNamespace; this.rootElementName = rootElementName; parse(is); } /** * Parses XML. * * @param input * Supplies the XML to parse. * * @param rootHandler * The root element handler corresponding to the expected document * type. * * @param rootNamespace
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -