htmlwriter.java

来自「解决如何把XML应用到JAVA里问题」· Java 代码 · 共 842 行 · 第 1/2 页
JAVA
842 行
/*
 * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
 *
 * This software is open source.
 * See the bottom of this file for the licence.
 */

package org.dom4j.io;

import java.io.IOException;
import java.io.OutputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Stack;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Entity;
import org.dom4j.Node;

import org.xml.sax.SAXException;

/**
 * <p>
 * <code>HTMLWriter</code> takes a DOM4J tree and formats it to a stream as
 * HTML. This formatter is similar to XMLWriter but it outputs the text of CDATA
 * and Entity sections rather than the serialised format as in XML, it has an
 * XHTML mode, it retains whitespace in certain elements such as &lt;PRE&gt;,
 * and it supports certain elements which have no corresponding close tag such
 * as for &lt;BR&gt; and &lt;P&gt;.
 * </p>
 * 
 * <p>
 * The OutputFormat passed in to the constructor is checked for isXHTML() and
 * isExpandEmptyElements(). See {@link OutputFormat OutputFormat}for details.
 * Here are the rules for <b>this class </b> based on an OutputFormat, "format",
 * passed in to the constructor: <br/><br/>
 * 
 * <ul>
 * <li>If an element is in {@link #getOmitElementCloseSet()
 * getOmitElementCloseSet}, then it is treated specially:
 * 
 * <ul>
 * <li>It never expands, since some browsers treat this as two separate
 * Horizontal Rules: &lt;HR&gt;&lt;/HR&gt;</li>
 * <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, then
 * it has a space before the closing single-tag slash, since Netscape 4.x-
 * treats this: &lt;HR /&gt; as an element named "HR" with an attribute named
 * "/", but that's better than when it refuses to recognize this: &lt;hr/&gt;
 * which it thinks is an element named "HR/".</li>
 * </ul>
 * 
 * </li>
 * <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, all
 * elements must have either a close element, or be a closed single tag.</li>
 * <li>If {@link org.dom4j.io.OutputFormat#isExpandEmptyElements()
 * format.isExpandEmptyElements()}() is true, all elements are expanded except
 * as above.</li>
 * </ul>
 * 
 * <b>Examples </b>
 * </p>
 * 
 * <p>
 * </p>
 * 
 * <p>
 * If isXHTML == true, CDATA sections look like this:
 * 
 * <PRE>
 * 
 * <b>&lt;myelement&gt;&lt;![CDATA[My data]]&gt;&lt;/myelement&gt; </b>
 * 
 * </PRE>
 * 
 * Otherwise, they look like this:
 * 
 * <PRE>
 * 
 * <b>&lt;myelement&gt;My data&lt;/myelement&gt; </b>
 * 
 * </PRE>
 * 
 * </p>
 * 
 * <p>
 * Basically, {@link OutputFormat.isXHTML() OutputFormat.isXHTML()} ==
 * <code>true</code> will produce valid XML, while {@link
 * org.dom4j.io.OutputFormat#isExpandEmptyElements()
 * format.isExpandEmptyElements()} determines whether empty elements are
 * expanded if isXHTML is true, excepting the special HTML single tags.
 * </p>
 * 
 * <p>
 * Also, HTMLWriter handles tags whose contents should be preformatted, that is,
 * whitespace-preserved. By default, this set includes the tags &lt;PRE&gt;,
 * &lt;SCRIPT&gt;, &lt;STYLE&gt;, and &lt;TEXTAREA&gt;, case insensitively. It
 * does not include &lt;IFRAME&gt;. Other tags, such as &lt;CODE&gt;,
 * &lt;KBD&gt;, &lt;TT&gt;, &lt;VAR&gt;, are usually rendered in a different
 * font in most browsers, but don't preserve whitespace, so they also don't
 * appear in the default list. HTML Comments are always whitespace-preserved.
 * However, the parser you use may store comments with linefeed-only text nodes
 * (\n) even if your platform uses another line.separator character, and
 * HTMLWriter outputs Comment nodes exactly as the DOM is set up by the parser.
 * See examples and discussion here: {@link#setPreformattedTags(java.util.Set)
 * setPreformattedTags}
 * </p>
 * 
 * <p>
 * <b>Examples </b>
 * </p>
 * <blockquote>
 * <p>
 * <b>Pretty Printing </b>
 * </p>
 * 
 * <p>
 * This example shows how to pretty print a string containing a valid HTML
 * document to a string. You can also just call the static methods of this
 * class: <br>
 * {@link #prettyPrintHTML(String) prettyPrintHTML(String)}or <br>
 * {@link #prettyPrintHTML(String,boolean,boolean,boolean,boolean)
 * prettyPrintHTML(String,boolean,boolean,boolean,boolean)} or, <br>
 * {@link #prettyPrintXHTML(String) prettyPrintXHTML(String)}for XHTML (note
 * the X)
 * </p>
 * 
 * <pre>
 * String testPrettyPrint(String html) {
 *     StringWriter sw = new StringWriter();
 *     OutputFormat format = OutputFormat.createPrettyPrint();
 *     // These are the default values for createPrettyPrint,
 *     // so you needn't set them:
 *     // format.setNewlines(true);
 *     // format.setTrimText(true);&lt;/font&gt;
 *     format.setXHTML(true);
 *     HTMLWriter writer = new HTMLWriter(sw, format);
 *     Document document = DocumentHelper.parseText(html);
 *     writer.write(document);
 *     writer.flush();
 *     return sw.toString();
 * }
 * </pre>
 * 
 * <p>
 * This example shows how to create a "squeezed" document, but one that will
 * work in browsers even if the browser line length is limited. No newlines are
 * included, no extra whitespace at all, except where it it required by
 * {@link #setPreformattedTags(java.util.Set) setPreformattedTags}.
 * </p>
 * 
 * <pre>
 * String testCrunch(String html) {
 *     StringWriter sw = new StringWriter();
 *     OutputFormat format = OutputFormat.createPrettyPrint();
 *     format.setNewlines(false);
 *     format.setTrimText(true);
 *     format.setIndent(&quot;&quot;);
 *     format.setXHTML(true);
 *     format.setExpandEmptyElements(false);
 *     format.setNewLineAfterNTags(20);
 *     org.dom4j.io.HTMLWriter writer = new HTMLWriter(sw, format);
 *     org.dom4j.Document document = DocumentHelper.parseText(html);
 *     writer.write(document);
 *     writer.flush();
 *     return sw.toString();
 * }
 * </pre>
 * 
 * </blockquote>
 * 
 * @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a>
 * @author Laramie Crocker
 * @version $Revision: 1.21 $
 */
public class HTMLWriter extends XMLWriter {
    private static String lineSeparator = System.getProperty("line.separator");

    protected static final HashSet DEFAULT_PREFORMATTED_TAGS;

    static {
        // If you change this list, update the javadoc examples, above in the
        // class javadoc, in writeElement, and in setPreformattedTags().
        DEFAULT_PREFORMATTED_TAGS = new HashSet();
        DEFAULT_PREFORMATTED_TAGS.add("PRE");
        DEFAULT_PREFORMATTED_TAGS.add("SCRIPT");
        DEFAULT_PREFORMATTED_TAGS.add("STYLE");
        DEFAULT_PREFORMATTED_TAGS.add("TEXTAREA");
    }

    protected static final OutputFormat DEFAULT_HTML_FORMAT;

    static {
        DEFAULT_HTML_FORMAT = new OutputFormat("  ", true);
        DEFAULT_HTML_FORMAT.setTrimText(true);
        DEFAULT_HTML_FORMAT.setSuppressDeclaration(true);
    }

    private Stack formatStack = new Stack();

    private String lastText = "";

    private int tagsOuput = 0;

    // legal values are 0+, but -1 signifies lazy initialization.
    private int newLineAfterNTags = -1;

    private HashSet preformattedTags = DEFAULT_PREFORMATTED_TAGS;

    /**
     * Used to store the qualified element names which should have no close
     * element tag
     */
    private HashSet omitElementCloseSet;

    public HTMLWriter(Writer writer) {
        super(writer, DEFAULT_HTML_FORMAT);
    }

    public HTMLWriter(Writer writer, OutputFormat format) {
        super(writer, format);
    }

    public HTMLWriter() throws UnsupportedEncodingException {
        super(DEFAULT_HTML_FORMAT);
    }

    public HTMLWriter(OutputFormat format) throws UnsupportedEncodingException {
        super(format);
    }

    public HTMLWriter(OutputStream out) throws UnsupportedEncodingException {
        super(out, DEFAULT_HTML_FORMAT);
    }

    public HTMLWriter(OutputStream out, OutputFormat format)
            throws UnsupportedEncodingException {
        super(out, format);
    }

    public void startCDATA() throws SAXException {
    }

    public void endCDATA() throws SAXException {
    }

    // Overloaded methods
    // added isXHTML() stuff so you get the CDATA brackets if you desire.
    protected void writeCDATA(String text) throws IOException {
        // XXX: Should we escape entities?
        // writer.write( escapeElementEntities( text ) );
        if (getOutputFormat().isXHTML()) {
            super.writeCDATA(text);
        } else {
            writer.write(text);
        }

        lastOutputNodeType = Node.CDATA_SECTION_NODE;
    }

    protected void writeEntity(Entity entity) throws IOException {
        writer.write(entity.getText());
        lastOutputNodeType = Node.ENTITY_REFERENCE_NODE;
    }

    protected void writeDeclaration() throws IOException {
    }

    protected void writeString(String text) throws IOException {
        /*
         * DOM stores \n at the end of text nodes that are newlines. This is
         * significant if we are in a PRE section. However, we only want to
         * output the system line.separator, not \n. This is a little brittle,
         * but this function appears to be called with these lineseparators as a
         * separate TEXT_NODE. If we are in a preformatted section, output the
         * right line.separator, otherwise ditch. If the single \n character is
         * not the text, then do the super thing to output the text.
         * 
         * Also, we store the last text that was not a \n since it may be used
         * by writeElement in this class to line up preformatted tags.
         */
        if (text.equals("\n")) {
            if (!formatStack.empty()) {
                super.writeString(lineSeparator);
            }

            return;
        }

        lastText = text;

        if (formatStack.empty()) {
            super.writeString(text.trim());
        } else {
            super.writeString(text);
        }
    }

    /**
     * Overriden method to not close certain element names to avoid wierd
     * behaviour from browsers for versions up to 5.x
     * 
     * @param qualifiedName
     *            DOCUMENT ME!
     * 
     * @throws IOException
     *             DOCUMENT ME!
     */
    protected void writeClose(String qualifiedName) throws IOException {
        if (!omitElementClose(qualifiedName)) {
            super.writeClose(qualifiedName);
        }
    }

    protected void writeEmptyElementClose(String qualifiedName)
            throws IOException {
        if (getOutputFormat().isXHTML()) {
            // xhtml, always check with format object whether to expand or not.
            if (omitElementClose(qualifiedName)) {
                // it was a special omit tag, do it the XHTML way: "<br/>",
                // ignoring the expansion option, since <br></br> is OK XML,
                // but produces twice the linefeeds desired in the browser.
                // for netscape 4.7, though all are fine with it, write a space
                // before the close slash.
                writer.write(" />");
            } else {
                super.writeEmptyElementClose(qualifiedName);
            }
        } else {
            // html, not xhtml
            if (omitElementClose(qualifiedName)) {
                // it was a special omit tag, do it the old html way: "<br>".
                writer.write(">");
            } else {
                // it was NOT a special omit tag, check with format object
                // whether to expand or not.
                super.writeEmptyElementClose(qualifiedName);
            }
        }
    }

    protected boolean omitElementClose(String qualifiedName) {
        return internalGetOmitElementCloseSet().contains(
                qualifiedName.toUpperCase());
    }

    private HashSet internalGetOmitElementCloseSet() {
        if (omitElementCloseSet == null) {
            omitElementCloseSet = new HashSet();
            loadOmitElementCloseSet(omitElementCloseSet);
        }

        return omitElementCloseSet;
    }

    // If you change this, change the javadoc for getOmitElementCloseSet.
    protected void loadOmitElementCloseSet(Set set) {
        set.add("AREA");
        set.add("BASE");
        set.add("BR");
        set.add("COL");
        set.add("HR");
        set.add("IMG");
        set.add("INPUT");
        set.add("LINK");
        set.add("META");
        set.add("P");
        set.add("PARAM");
    }

    // let the people see the set, but not modify it.

    /**
     * A clone of the Set of elements that can have their close-tags omitted. By
     * default it should be "AREA", "BASE", "BR", "COL", "HR", "IMG", "INPUT",
     * "LINK", "META", "P", "PARAM"
     * 
     * @return A clone of the Set.
     */
    public Set getOmitElementCloseSet() {
        return (Set) (internalGetOmitElementCloseSet().clone());
    }

    /**
     * To use the empty set, pass an empty Set, or null:
     * 
     * <pre>
     * 
     * 
     *       setOmitElementCloseSet(new HashSet());
     *     or
     *       setOmitElementCloseSet(null);
     * 
     *  
     * </pre>
     * 
     * @param newSet
     *            DOCUMENT ME!
     */
    public void setOmitElementCloseSet(Set newSet) {
        // resets, and safely empties it out if newSet is null.
        omitElementCloseSet = new HashSet();

        if (newSet != null) {
            omitElementCloseSet = new HashSet();

            Object aTag;
            Iterator iter = newSet.iterator();

            while (iter.hasNext()) {
                aTag = iter.next();

                if (aTag != null) {
                    omitElementCloseSet.add(aTag.toString().toUpperCase());
                }
            }
htmlwriter.java - 源码说明

本页面展示了「解决如何把XML应用到JAVA里问题」中的 htmlwriter.java 源码文件，采用 Java 编程语言编写，共 842 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与JAVA相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?