domconsumer.java
来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 944 行 · 第 1/2 页
JAVA
944 行
/* * Copyright (C) 1999-2001 David Brownell * * This file is part of GNU JAXP, a library. * * GNU JAXP is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GNU JAXP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * As a special exception, if you link this library with other files to * produce an executable, this library does not by itself cause the * resulting executable to be covered by the GNU General Public License. * This exception does not however invalidate any other reasons why the * executable file might be covered by the GNU General Public License. */package gnu.xml.pipeline;import java.util.Hashtable;import org.w3c.dom.*;import org.xml.sax.*;import org.xml.sax.ext.DeclHandler;import org.xml.sax.ext.LexicalHandler;import org.xml.sax.helpers.AttributesImpl;import gnu.xml.util.DomParser;/** * This consumer builds a DOM Document from its input, acting either as a * pipeline terminus or as an intermediate buffer. When a document's worth * of events has been delivered to this consumer, that document is read with * a {@link DomParser} and sent to the next consumer. It is also available * as a read-once property. * * <p>The DOM tree is constructed as faithfully as possible. There are some * complications since a DOM should expose behaviors that can't be implemented * without API backdoors into that DOM, and because some SAX parsers don't * report all the information that DOM permits to be exposed. The general * problem areas involve information from the Document Type Declaration (DTD). * DOM only represents a limited subset, but has some behaviors that depend * on much deeper knowledge of a document's DTD. You shouldn't have much to * worry about unless you change handling of "noise" nodes from its default * setting (which ignores them all); note if you use JAXP to populate your * DOM trees, it wants to save "noise" nodes by default. (Such nodes include * ignorable whitespace, comments, entity references and CDATA boundaries.) * Otherwise, your * main worry will be if you use a SAX parser that doesn't flag ignorable * whitespace unless it's validating (few don't). * * <p> The SAX2 events used as input must contain XML Names for elements * and attributes, with original prefixes. In SAX2, * this is optional unless the "namespace-prefixes" parser feature is set. * Moreover, many application components won't provide completely correct * structures anyway. <em>Before you convert a DOM to an output document, * you should plan to postprocess it to create or repair such namespace * information.</em> The {@link NSFilter} pipeline stage does such work. * * <p> <em>Note: changes late in DOM L2 process made it impractical to * attempt to create the DocumentType node in any implementation-neutral way, * much less to populate it (L1 didn't support even creating such nodes). * To create and populate such a node, subclass the inner * {@link DomConsumer.Handler} class and teach it about the backdoors into * whatever DOM implementation you want. It's possible that some revised * DOM API (L3?) will make this problem solvable again. </em> * * @see DomParser * * @author David Brownell */public class DomConsumer implements EventConsumer{ private Class domImpl; private boolean hidingCDATA = true; private boolean hidingComments = true; private boolean hidingWhitespace = true; private boolean hidingReferences = true; private Handler handler; private ErrorHandler errHandler; private EventConsumer next; // FIXME: this can't be a generic pipeline stage just now, // since its input became a Class not a String (to be turned // into a class, using the right class loader) /** * Configures this pipeline terminus to use the specified implementation * of DOM when constructing its result value. * * @param impl class implementing {@link org.w3c.dom.Document Document} * which publicly exposes a default constructor * * @exception SAXException when there is a problem creating an * empty DOM document using the specified implementation */ public DomConsumer (Class impl) throws SAXException { domImpl = impl; handler = new Handler (this); } /** * This is the hook through which a subclass provides a handler * which knows how to access DOM extensions, specific to some * implementation, to record additional data in a DOM. * Treat this as part of construction; don't call it except * before (or between) parses. */ protected void setHandler (Handler h) { handler = h; } private Document emptyDocument () throws SAXException { try { return (Document) domImpl.newInstance (); } catch (IllegalAccessException e) { throw new SAXException ("can't access constructor: " + e.getMessage ()); } catch (InstantiationException e) { throw new SAXException ("can't instantiate Document: " + e.getMessage ()); } } /** * Configures this consumer as a buffer/filter, using the specified * DOM implementation when constructing its result value. * * <p> This event consumer acts as a buffer and filter, in that it * builds a DOM tree and then writes it out when <em>endDocument</em> * is invoked. Because of the limitations of DOM, much information * will as a rule not be seen in that replay. To get a full fidelity * copy of the input event stream, use a {@link TeeConsumer}. * * @param impl class implementing {@link org.w3c.dom.Document Document} * which publicly exposes a default constructor * @param next receives a "replayed" sequence of parse events when * the <em>endDocument</em> method is invoked. * * @exception SAXException when there is a problem creating an * empty DOM document using the specified DOM implementation */ public DomConsumer (Class impl, EventConsumer n) throws SAXException { this (impl); next = n; } /** * Returns the document constructed from the preceding * sequence of events. This method should not be * used again until another sequence of events has been * given to this EventConsumer. */ final public Document getDocument () { return handler.clearDocument (); } public void setErrorHandler (ErrorHandler handler) { errHandler = handler; } /** * Returns true if the consumer is hiding entity references nodes * (the default), and false if EntityReference nodes should * instead be created. Such EntityReference nodes will normally be * empty, unless an implementation arranges to populate them and then * turn them back into readonly objects. * * @see #setHidingReferences */ final public boolean isHidingReferences () { return hidingReferences; } /** * Controls whether the consumer will hide entity expansions, * or will instead mark them with entity reference nodes. * * @see #isHidingReferences * @param flag False if entity reference nodes will appear */ final public void setHidingReferences (boolean flag) { hidingReferences = flag; } /** * Returns true if the consumer is hiding comments (the default), * and false if they should be placed into the output document. * * @see #setHidingComments */ public final boolean isHidingComments () { return hidingComments; } /** * Controls whether the consumer is hiding comments. * * @see #isHidingComments */ public final void setHidingComments (boolean flag) { hidingComments = flag; } /** * Returns true if the consumer is hiding ignorable whitespace * (the default), and false if such whitespace should be placed * into the output document as children of element nodes. * * @see #setHidingWhitespace */ public final boolean isHidingWhitespace () { return hidingWhitespace; } /** * Controls whether the consumer hides ignorable whitespace * * @see #isHidingComments */ public final void setHidingWhitespace (boolean flag) { hidingWhitespace = flag; } /** * Returns true if the consumer is saving CDATA boundaries, or * false (the default) otherwise. * * @see #setHidingCDATA */ final public boolean isHidingCDATA () { return hidingCDATA; } /** * Controls whether the consumer will save CDATA boundaries. * * @see #isHidingCDATA * @param flag True to treat CDATA text differently from other * text nodes */ final public void setHidingCDATA (boolean flag) { hidingCDATA = flag; } /** Returns the document handler being used. */ final public ContentHandler getContentHandler () { return handler; } /** Returns the DTD handler being used. */ final public DTDHandler getDTDHandler () { return handler; } /** * Returns the lexical handler being used. * (DOM construction can't really use declaration handlers.) */ final public Object getProperty (String id) throws SAXNotRecognizedException { if ("http://xml.org/sax/properties/lexical-handler".equals (id)) return handler; if ("http://xml.org/sax/properties/declaration-handler".equals (id)) return handler; throw new SAXNotRecognizedException (id); } EventConsumer getNext () { return next; } ErrorHandler getErrorHandler () { return errHandler; } /** * Class used to intercept various parsing events and use them to * populate a DOM document. Subclasses would typically know and use * backdoors into specific DOM implementations, used to implement * DTD-related functionality. * * <p> Note that if this ever throws a DOMException (runtime exception) * that will indicate a bug in the DOM (e.g. doesn't support something * per specification) or the parser (e.g. emitted an illegal name, or * accepted illegal input data). </p> */ public static class Handler implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler { protected DomConsumer consumer; private DOMImplementation impl; private Document document; private boolean isL2; private Locator locator; private Node top; private boolean inCDATA; private boolean mergeCDATA; private boolean inDTD; private String currentEntity; private boolean recreatedAttrs; private AttributesImpl attributes = new AttributesImpl (); /** * Subclasses may use SAX2 events to provide additional * behaviors in the resulting DOM. */ protected Handler (DomConsumer consumer) throws SAXException { this.consumer = consumer; document = consumer.emptyDocument (); impl = document.getImplementation (); isL2 = impl.hasFeature ("XML", "2.0"); } private void fatal (String message, Exception x) throws SAXException { SAXParseException e; ErrorHandler errHandler = consumer.getErrorHandler ();; if (locator == null) e = new SAXParseException (message, null, null, -1, -1, x); else e = new SAXParseException (message, locator, x); if (errHandler != null) errHandler.fatalError (e); throw e; } /** * Returns and forgets the document produced. If the handler is * reused, a new document may be created. */ Document clearDocument () { Document retval = document; document = null; locator = null; return retval; } /** * Returns the document under construction. */ protected Document getDocument () { return document; } /** * Returns the current node being populated. This is usually * an Element or Document, but it might be an EntityReference * node if some implementation-specific code knows how to put * those into the result tree and later mark them as readonly. */ protected Node getTop () { return top; } // SAX1 public void setDocumentLocator (Locator locator) { this.locator = locator; } // SAX1 public void startDocument () throws SAXException { if (document == null) try { if (isL2) { // couple to original implementation document = impl.createDocument (null, "foo", null); document.removeChild (document.getFirstChild ()); } else { document = consumer.emptyDocument (); } } catch (Exception e) { fatal ("DOM create document", e); } top = document; } // SAX1 public void endDocument () throws SAXException { try { if (consumer.getNext () != null && document != null) { DomParser parser = new DomParser (document); EventFilter.bind (parser, consumer.getNext ()); parser.parse ("ignored"); } } finally { top = null; } } // SAX1 public void processingInstruction (String target, String data) throws SAXException { // we can't create populated entity ref nodes using // only public DOM APIs (they've got to be readonly) if (currentEntity != null) return; ProcessingInstruction pi; if (isL2 // && consumer.isUsingNamespaces () && target.indexOf (':') != -1) namespaceError ( "PI target name is namespace nonconformant: " + target); if (inDTD) return; pi = document.createProcessingInstruction (target, data); top.appendChild (pi); } /** * Subclasses may overrride this method to provide a more efficient * way to construct text nodes. * Typically, copying the text into a single character array will * be more efficient than doing that as well as allocating other * needed for a String, including an internal StringBuffer. * Those additional memory and CPU costs can be incurred later, * if ever needed. * Unfortunately the standard DOM factory APIs encourage those costs * to be incurred early. */ protected Text createText ( boolean isCDATA, char ch [], int start, int length ) { String value = new String (ch, start, length); if (isCDATA) return document.createCDATASection (value); else return document.createTextNode (value); } // SAX1
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?