basemarkupserializer.java
来自「JAVA 所有包」· Java 代码 · 共 1,946 行 · 第 1/5 页
JAVA
1,946 行
/* * Copyright 1999-2002,2004,2005 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */// Sep 14, 2000:// Fixed comments to preserve whitespaces and add a line break// when indenting. Reported by Gervase Markham <gerv@gerv.net>// Sep 14, 2000:// Fixed serializer to report IO exception directly, instead at// the end of document processing.// Reported by Patrick Higgins <phiggins@transzap.com>// Sep 13, 2000:// CR in character data will print as �D;// Aug 25, 2000:// Fixed processing instruction printing inside element content// to not escape content. Reported by Mikael Staldal// <d96-mst@d.kth.se>// Aug 25, 2000:// Added ability to omit comments.// Contributed by Anupam Bagchi <abagchi@jtcsv.com>// Aug 26, 2000:// Fixed bug in newline handling when preserving spaces.// Contributed by Mike Dusseault <mdusseault@home.com>// Aug 29, 2000:// Fixed state.unescaped not being set to false when// entering element state.// Reported by Lowell Vaughn <lvaughn@agillion.com>package com.sun.org.apache.xml.internal.serialize;import java.io.IOException;import java.io.OutputStream;import java.io.Writer;import java.util.Hashtable;import java.util.Vector;import com.sun.org.apache.xerces.internal.dom.DOMErrorImpl;import com.sun.org.apache.xerces.internal.dom.DOMLocatorImpl;import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;import com.sun.org.apache.xerces.internal.util.XMLChar;import org.w3c.dom.DOMImplementation;import org.w3c.dom.Document;import org.w3c.dom.DocumentFragment;import org.w3c.dom.DocumentType;import org.w3c.dom.DOMError;import org.w3c.dom.DOMErrorHandler;import org.w3c.dom.Element;import org.w3c.dom.Entity;import org.w3c.dom.NamedNodeMap;import org.w3c.dom.Node;import org.w3c.dom.Notation;import org.w3c.dom.ls.LSException;import org.w3c.dom.ls.LSSerializerFilter;import org.w3c.dom.traversal.NodeFilter;import org.xml.sax.ContentHandler;import org.xml.sax.DTDHandler;import org.xml.sax.DocumentHandler;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.ext.DeclHandler;import org.xml.sax.ext.LexicalHandler;/** * Base class for a serializer supporting both DOM and SAX pretty * serializing of XML/HTML/XHTML documents. Derives classes perform * the method-specific serializing, this class provides the common * serializing mechanisms. * <p> * The serializer must be initialized with the proper writer and * output format before it can be used by calling {@link #setOutputCharStream} * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat} * for the output format. * <p> * The serializer can be reused any number of times, but cannot * be used concurrently by two threads. * <p> * If an output stream is used, the encoding is taken from the * output format (defaults to <tt>UTF-8</tt>). If a writer is * used, make sure the writer uses the same encoding (if applies) * as specified in the output format. * <p> * The serializer supports both DOM and SAX. DOM serializing is done * by calling {@link #serialize(Document)} and SAX serializing is done by firing * SAX events and using the serializer as a document handler. * This also applies to derived class. * <p> * If an I/O exception occurs while serializing, the serializer * will not throw an exception directly, but only throw it * at the end of serializing (either DOM or SAX's {@link * org.xml.sax.DocumentHandler#endDocument}. * <p> * For elements that are not specified as whitespace preserving, * the serializer will potentially break long text lines at space * boundaries, indent lines, and serialize elements on separate * lines. Line terminators will be regarded as spaces, and * spaces at beginning of line will be stripped. * <p> * When indenting, the serializer is capable of detecting seemingly * element content, and serializing these elements indented on separate * lines. An element is serialized indented when it is the first or * last child of an element, or immediate following or preceding * another element. * * * @version $Revision: 1.4 $ $Date: 2006/01/23 06:47:25 $ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a> * @author Elena Litani, IBM * @author Sunitha Reddy, Sun Microsystems * @see Serializer * @see LSSerializer */public abstract class BaseMarkupSerializer implements ContentHandler, DocumentHandler, LexicalHandler, DTDHandler, DeclHandler, DOMSerializer, Serializer{ // DOM L3 implementation protected short features = 0xFFFFFFFF; protected DOMErrorHandler fDOMErrorHandler; protected final DOMErrorImpl fDOMError = new DOMErrorImpl(); protected LSSerializerFilter fDOMFilter; protected EncodingInfo _encodingInfo; /** * Holds array of all element states that have been entered. * The array is automatically resized. When leaving an element, * it's state is not removed but reused when later returning * to the same nesting level. */ private ElementState[] _elementStates; /** * The index of the next state to place in the array, * or one plus the index of the current state. When zero, * we are in no state. */ private int _elementStateCount; /** * Vector holding comments and PIs that come before the root * element (even after it), see {@link #serializePreRoot}. */ private Vector _preRoot; /** * If the document has been started (header serialized), this * flag is set to true so it's not started twice. */ protected boolean _started; /** * True if the serializer has been prepared. This flag is set * to false when the serializer is reset prior to using it, * and to true after it has been prepared for usage. */ private boolean _prepared; /** * Association between namespace URIs (keys) and prefixes (values). * Accumulated here prior to starting an element and placing this * list in the element state. */ protected Hashtable _prefixes; /** * The system identifier of the document type, if known. */ protected String _docTypePublicId; /** * The system identifier of the document type, if known. */ protected String _docTypeSystemId; /** * The output format associated with this serializer. This will never * be a null reference. If no format was passed to the constructor, * the default one for this document type will be used. The format * object is never changed by the serializer. */ protected OutputFormat _format; /** * The printer used for printing text parts. */ protected Printer _printer; /** * True if indenting printer. */ protected boolean _indenting; /** Temporary buffer to store character data */ protected final StringBuffer fStrBuffer = new StringBuffer(40); /** * The underlying writer. */ private Writer _writer; /** * The output stream. */ private OutputStream _output; /** Current node that is being processed */ protected Node fCurrentNode = null; //--------------------------------// // Constructor and initialization // //--------------------------------// /** * Protected constructor can only be used by derived class. * Must initialize the serializer before serializing any document, * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream} * first */ protected BaseMarkupSerializer( OutputFormat format ) { int i; _elementStates = new ElementState[ 10 ]; for ( i = 0 ; i < _elementStates.length ; ++i ) _elementStates[ i ] = new ElementState(); _format = format; } public DocumentHandler asDocumentHandler() throws IOException { prepare(); return this; } public ContentHandler asContentHandler() throws IOException { prepare(); return this; } public DOMSerializer asDOMSerializer() throws IOException { prepare(); return this; } public void setOutputByteStream( OutputStream output ) { if ( output == null ) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ArgumentIsNull", new Object[]{"output"}); throw new NullPointerException(msg); } _output = output; _writer = null; reset(); } public void setOutputCharStream( Writer writer ) { if ( writer == null ) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ArgumentIsNull", new Object[]{"writer"}); throw new NullPointerException(msg); } _writer = writer; _output = null; reset(); } public void setOutputFormat( OutputFormat format ) { if ( format == null ) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ArgumentIsNull", new Object[]{"format"}); throw new NullPointerException(msg); } _format = format; reset(); } public boolean reset() { if ( _elementStateCount > 1 ) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ResetInMiddle", null); throw new IllegalStateException(msg); } _prepared = false; fCurrentNode = null; fStrBuffer.setLength(0); return true; } protected void prepare() throws IOException { if ( _prepared ) return; if ( _writer == null && _output == null ) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "NoWriterSupplied", null); throw new IOException(msg); } // If the output stream has been set, use it to construct // the writer. It is possible that the serializer has been // reused with the same output stream and different encoding. _encodingInfo = _format.getEncodingInfo(); if ( _output != null ) { _writer = _encodingInfo.getWriter(_output); } if ( _format.getIndenting() ) { _indenting = true; _printer = new IndentPrinter( _writer, _format ); } else { _indenting = false; _printer = new Printer( _writer, _format ); } ElementState state; _elementStateCount = 0; state = _elementStates[ 0 ]; state.namespaceURI = null; state.localName = null; state.rawName = null; state.preserveSpace = _format.getPreserveSpace(); state.empty = true; state.afterElement = false; state.afterComment = false; state.doCData = state.inCData = false; state.prefixes = null; _docTypePublicId = _format.getDoctypePublic(); _docTypeSystemId = _format.getDoctypeSystem(); _started = false; _prepared = true; } //----------------------------------// // DOM document serializing methods //
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?