htmlserializer.java
来自「JAVA的一些源码 JAVA2 STANDARD EDITION DEVELO」· Java 代码 · 共 921 行 · 第 1/3 页
JAVA
921 行
/* * The Apache Software License, Version 1.1 * * * Copyright (c) 1999-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */// Sep 14, 2000:// Fixed serializer to report IO exception directly, instead at// the end of document processing.// Reported by Patrick Higgins <phiggins@transzap.com>// Aug 21, 2000:// Fixed bug in startDocument not calling prepare.// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>// Aug 21, 2000:// Added ability to omit DOCTYPE declaration.// Sep 1, 2000:// If no output format is provided the serializer now defaults// to ISO-8859-1 encoding. Reported by Mikael Staldal// <d96-mst@d.kth.se>package com.sun.org.apache.xml.internal.serialize;import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;import java.io.IOException;import java.io.OutputStream;import java.io.Writer;import java.util.Enumeration;import java.util.Locale;import org.w3c.dom.Attr;import org.w3c.dom.Element;import org.w3c.dom.NamedNodeMap;import org.w3c.dom.Node;import org.xml.sax.AttributeList;import org.xml.sax.Attributes;import org.xml.sax.SAXException;/** * Implements an HTML/XHTML serializer supporting both DOM and SAX * pretty serializing. HTML/XHTML mode is determined in the * constructor. For usage instructions see {@link Serializer}. * <p> * If an output stream is used, the encoding is taken from the * output format (defaults to <tt>UTF-8</tt>). If a writer is * used, make sure the writer uses the same encoding (if applies) * as specified in the output format. * <p> * The serializer supports both DOM and SAX. DOM serializing is done * by calling {@link #serialize} and SAX serializing is done by firing * SAX events and using the serializer as a document handler. * <p> * If an I/O exception occurs while serializing, the serializer * will not throw an exception directly, but only throw it * at the end of serializing (either DOM or SAX's {@link * org.xml.sax.DocumentHandler#endDocument}. * <p> * For elements that are not specified as whitespace preserving, * the serializer will potentially break long text lines at space * boundaries, indent lines, and serialize elements on separate * lines. Line terminators will be regarded as spaces, and * spaces at beginning of line will be stripped. * <p> * XHTML is slightly different than HTML: * <ul> * <li>Element/attribute names are lower case and case matters * <li>Attributes must specify value, even if empty string * <li>Empty elements must have '/' in empty tag * <li>Contents of SCRIPT and STYLE elements serialized as CDATA * </ul> * * @deprecated * @version $Revision: 1.26 $ $Date: 2004/02/16 05:24:55 $ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> * @see Serializer */public class HTMLSerializer extends BaseMarkupSerializer{ /** * True if serializing in XHTML format. */ private boolean _xhtml; public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml"; // for users to override XHTMLNamespace if need be. private String fUserXHTMLNamespace = null; /** * Constructs a new HTML/XHTML serializer depending on the value of * <tt>xhtml</tt>. The serializer cannot be used without calling * {@link #setOutputCharStream} or {@link #setOutputByteStream} first. * * @param xhtml True if XHTML serializing */ protected HTMLSerializer( boolean xhtml, OutputFormat format ) { super( format ); _xhtml = xhtml; } /** * Constructs a new serializer. The serializer cannot be used without * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} * first. */ public HTMLSerializer() { this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); } /** * Constructs a new serializer. The serializer cannot be used without * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} * first. */ public HTMLSerializer( OutputFormat format ) { this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); } /** * Constructs a new serializer that writes to the specified writer * using the specified output format. If <tt>format</tt> is null, * will use a default output format. * * @param writer The writer to use * @param format The output format to use, null for the default */ public HTMLSerializer( Writer writer, OutputFormat format ) { this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); setOutputCharStream( writer ); } /** * Constructs a new serializer that writes to the specified output * stream using the specified output format. If <tt>format</tt> * is null, will use a default output format. * * @param output The output stream to use * @param format The output format to use, null for the default */ public HTMLSerializer( OutputStream output, OutputFormat format ) { this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); setOutputByteStream( output ); } public void setOutputFormat( OutputFormat format ) { super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); } // Set value for alternate XHTML namespace. public void setXHTMLNamespace(String newNamespace) { fUserXHTMLNamespace = newNamespace; } // setXHTMLNamespace(String) //-----------------------------------------// // SAX content handler serializing methods // //-----------------------------------------// public void startElement( String namespaceURI, String localName, String rawName, Attributes attrs ) throws SAXException { int i; boolean preserveSpace; ElementState state; String name; String value; String htmlName; boolean addNSAttr = false; try { if ( _printer == null ) throw new IllegalStateException( DOMMessageFormatter.formatMessage( DOMMessageFormatter.SERIALIZER_DOMAIN, "NoWriterSupplied", null)); state = getElementState(); if ( isDocumentState() ) { // If this is the root element handle it differently. // If the first root element in the document, serialize // the document's DOCTYPE. Space preserving defaults // to that of the output format. if ( ! _started ) startDocument( (localName == null || localName.length() == 0) ? rawName : localName ); } else { // For any other element, if first in parent, then // close parent's opening tag and use the parnet's // space preserving. if ( state.empty ) _printer.printText( '>' ); // Indent this element on a new line if the first // content of the parent element or immediately // following an element. if ( _indenting && ! state.preserveSpace && ( state.empty || state.afterElement ) ) _printer.breakLine(); } preserveSpace = state.preserveSpace; // Do not change the current element state yet. // This only happens in endElement(). // As per SAX2, the namespace URI is an empty string if the element has no // namespace URI, or namespaces is turned off. The check against null protects // against broken SAX implementations, so I've left it there. - mrglavas boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0); // SAX2: rawName (QName) could be empty string if // namespace-prefixes property is false. if ( rawName == null || rawName.length() == 0) { rawName = localName; if ( hasNamespaceURI ) { String prefix; prefix = getPrefix( namespaceURI ); if ( prefix != null && prefix.length() != 0 ) rawName = prefix + ":" + localName; } addNSAttr = true; } if ( !hasNamespaceURI ) htmlName = rawName; else { if ( namespaceURI.equals( XHTMLNamespace ) || (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) ) htmlName = localName; else htmlName = null; } // XHTML: element names are lower case, DOM will be different _printer.printText( '<' ); if ( _xhtml ) _printer.printText( rawName.toLowerCase(Locale.ENGLISH) ); else
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?