htmlserializer.java

来自「JAVA 所有包」· Java 代码 · 共 883 行 · 第 1/3 页

JAVA
883
字号
/* * Copyright 1999-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */// Sep 14, 2000://  Fixed serializer to report IO exception directly, instead at//  the end of document processing.//  Reported by Patrick Higgins <phiggins@transzap.com>// Aug 21, 2000://  Fixed bug in startDocument not calling prepare.//  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>// Aug 21, 2000://  Added ability to omit DOCTYPE declaration.// Sep 1, 2000://   If no output format is provided the serializer now defaults//   to ISO-8859-1 encoding. Reported by Mikael Staldal//   <d96-mst@d.kth.se>package com.sun.org.apache.xml.internal.serialize;import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;import java.io.IOException;import java.io.OutputStream;import java.io.Writer;import java.util.Enumeration;import java.util.Locale;import org.w3c.dom.Attr;import org.w3c.dom.Element;import org.w3c.dom.NamedNodeMap;import org.w3c.dom.Node;import org.xml.sax.AttributeList;import org.xml.sax.Attributes;import org.xml.sax.SAXException;/** * Implements an HTML/XHTML serializer supporting both DOM and SAX * pretty serializing. HTML/XHTML mode is determined in the * constructor.  For usage instructions see {@link Serializer}. * <p> * If an output stream is used, the encoding is taken from the * output format (defaults to <tt>UTF-8</tt>). If a writer is * used, make sure the writer uses the same encoding (if applies) * as specified in the output format. * <p> * The serializer supports both DOM and SAX. DOM serializing is done * by calling {@link #serialize} and SAX serializing is done by firing * SAX events and using the serializer as a document handler. * <p> * If an I/O exception occurs while serializing, the serializer * will not throw an exception directly, but only throw it * at the end of serializing (either DOM or SAX's {@link * org.xml.sax.DocumentHandler#endDocument}. * <p> * For elements that are not specified as whitespace preserving, * the serializer will potentially break long text lines at space * boundaries, indent lines, and serialize elements on separate * lines. Line terminators will be regarded as spaces, and * spaces at beginning of line will be stripped. * <p> * XHTML is slightly different than HTML: * <ul> * <li>Element/attribute names are lower case and case matters * <li>Attributes must specify value, even if empty string * <li>Empty elements must have '/' in empty tag * <li>Contents of SCRIPT and STYLE elements serialized as CDATA * </ul> * * @deprecated This class was deprecated in Xerces 2.6.2. It is * recommended that new applications use JAXP's Transformation API  * for XML (TrAX) for serializing HTML. See the Xerces documentation * for more information. * @version $Revision: 1.2.6.1 $ $Date: 2005/09/09 07:26:14 $ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> * @see Serializer */public class HTMLSerializer    extends BaseMarkupSerializer{    /**     * True if serializing in XHTML format.     */    private boolean _xhtml;    public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml";    // for users to override XHTMLNamespace if need be.    private String fUserXHTMLNamespace = null;    /**     * Constructs a new HTML/XHTML serializer depending on the value of     * <tt>xhtml</tt>. The serializer cannot be used without calling     * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.     *     * @param xhtml True if XHTML serializing     */    protected HTMLSerializer( boolean xhtml, OutputFormat format )    {        super( format );        _xhtml = xhtml;    }    /**     * Constructs a new serializer. The serializer cannot be used without     * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}     * first.     */    public HTMLSerializer()    {        this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );    }    /**     * Constructs a new serializer. The serializer cannot be used without     * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}     * first.     */    public HTMLSerializer( OutputFormat format )    {        this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );    }    /**     * Constructs a new serializer that writes to the specified writer     * using the specified output format. If <tt>format</tt> is null,     * will use a default output format.     *     * @param writer The writer to use     * @param format The output format to use, null for the default     */    public HTMLSerializer( Writer writer, OutputFormat format )    {        this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );        setOutputCharStream( writer );    }    /**     * Constructs a new serializer that writes to the specified output     * stream using the specified output format. If <tt>format</tt>     * is null, will use a default output format.     *     * @param output The output stream to use     * @param format The output format to use, null for the default     */    public HTMLSerializer( OutputStream output, OutputFormat format )    {        this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );        setOutputByteStream( output );    }    public void setOutputFormat( OutputFormat format )    {        super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );    }    // Set  value for alternate XHTML namespace.    public void setXHTMLNamespace(String newNamespace) {        fUserXHTMLNamespace = newNamespace;    } // setXHTMLNamespace(String)    //-----------------------------------------//    // SAX content handler serializing methods //    //-----------------------------------------//    public void startElement( String namespaceURI, String localName,                              String rawName, Attributes attrs )        throws SAXException    {        int          i;        boolean      preserveSpace;        ElementState state;        String       name;        String       value;        String       htmlName;        boolean      addNSAttr = false;        try {            if ( _printer == null )            	throw new IllegalStateException( 				    DOMMessageFormatter.formatMessage(				    DOMMessageFormatter.SERIALIZER_DOMAIN,                    "NoWriterSupplied", null));            state = getElementState();            if ( isDocumentState() ) {                // If this is the root element handle it differently.                // If the first root element in the document, serialize                // the document's DOCTYPE. Space preserving defaults                // to that of the output format.                if ( ! _started )                    startDocument( (localName == null || localName.length() == 0)                         ? rawName : localName );            } else {                // For any other element, if first in parent, then                // close parent's opening tag and use the parnet's                // space preserving.                if ( state.empty )                    _printer.printText( '>' );                // Indent this element on a new line if the first                // content of the parent element or immediately                // following an element.                if ( _indenting && ! state.preserveSpace &&                     ( state.empty || state.afterElement ) )                    _printer.breakLine();            }            preserveSpace = state.preserveSpace;            // Do not change the current element state yet.            // This only happens in endElement().                        // As per SAX2, the namespace URI is an empty string if the element has no            // namespace URI, or namespaces is turned off. The check against null protects            // against broken SAX implementations, so I've left it there. - mrglavas            boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);            // SAX2: rawName (QName) could be empty string if             // namespace-prefixes property is false.            if ( rawName == null || rawName.length() == 0) {                rawName = localName;                if ( hasNamespaceURI ) {                    String prefix;                    prefix = getPrefix( namespaceURI );                    if ( prefix != null && prefix.length() != 0 )                        rawName = prefix + ":" + localName;                }                addNSAttr = true;            }            if ( !hasNamespaceURI )                htmlName = rawName;            else {                if ( namespaceURI.equals( XHTMLNamespace ) ||                        (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )                    htmlName = localName;                else                    htmlName = null;            }            // XHTML: element names are lower case, DOM will be different            _printer.printText( '<' );            if ( _xhtml )                _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );            else                _printer.printText( rawName );            _printer.indent();            // For each attribute serialize it's name and value as one part,            // separated with a space so the element can be broken on            // multiple lines.            if ( attrs != null ) {                for ( i = 0 ; i < attrs.getLength() ; ++i ) {                    _printer.printSpace();                    name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);                    value = attrs.getValue( i );                    if ( _xhtml || hasNamespaceURI ) {                        // XHTML: print empty string for null values.                        if ( value == null ) {                            _printer.printText( name );                            _printer.printText( "=\"\"" );                        } else {                            _printer.printText( name );                            _printer.printText( "=\"" );                            printEscaped( value );                            _printer.printText( '"' );                        }                    } else {                        // HTML: Empty values print as attribute name, no value.                        // HTML: URI attributes will print unescaped                        if ( value == null ) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?