📄 htmlwriter.java

📁 Java的面向对象数据库系统的源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved. * * This software is open source. * See the bottom of this file for the licence. * * $Id: HTMLWriter.java,v 1.3 2003/07/07 10:30:29 per_nyfelt Exp $ */package org.dom4j.io;import org.dom4j.*;import org.xml.sax.SAXException;import java.io.*;import java.util.HashSet;import java.util.Iterator;import java.util.Set;/** <p><code>HTMLWriter</code> takes a DOM4J tree and formats it to a  * stream as HTML.  * This formatter is similar to XMLWriter but it outputs the text of CDATA  * and Entity sections rather than the serialised format as in XML,  * it has an XHTML mode, it retains whitespace in certain elements such as &lt;PRE&gt;,  * and it supports certain elements which have no corresponding close tag such  * as for &lt;BR&gt; and &lt;P&gt;.  *  * <p> The OutputFormat passed in to the constructor is checked for isXHTML() and isExpandEmptyElements().  *   See {@link OutputFormat OutputFormat} for details.  Here are the rules for  * <b>this class</b> based on an OutputFormat, "format", passed in to the constructor:<br/><br/>  *  <ul>  *     <li>If an element is in {@link #getOmitElementCloseSet() getOmitElementCloseSet}, then it is treated specially:</li>  *     <ul>  *        <li>It never expands, since some browsers treat this as two separate Horizontal Rules: &lt;HR&gt;&lt;/HR&gt;</li>  *        <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, then it has a space before the closing single-tag slash, since Netscape 4.x- treats this: &lt;HR /&gt; as  *            an element named "HR" with an attribute named "/", but that's better than when it refuses to recognize this:  &lt;hr/&gt;  *            which it thinks is an element named "HR/". </li>  *     </ul>  *     <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, all elements must have  *         either a close element, or be a closed single tag.</li>  *     <li>If {@link org.dom4j.io.OutputFormat#isExpandEmptyElements() format.isExpandEmptyElements()}() is true,  *         all elements are expanded except as above.</li>  *  </ul>  * <b>Examples</b>  *  *  <table border="1" cellpadding="0" cellspacing="0">  *    <tr>  *      <th colspan="3" align="left">isXHTML == true</th>  *    </tr>  *    <tr>  *      <td width="25">&#160;</td>  *      <th align="left">isExpandEmptyElements == true</th>  *      <td><code>  *      &lt;td&gt;&lt;/td&gt;<br />  *      &lt;br&#160;/&gt;<br />  *      &lt;foo&gt;&lt;/foo&gt;</code>  *      </td>  *    </tr>  *    <tr>  *      <td width="25">&#160;</td>  *      <th align="left">isExpandEmptyElements == false</th>  *      <td><code>  *      &lt;td/&gt;<br />  *      &lt;br&#160;/&gt;<br />  *      &lt;foo/&gt;</code>  *      </td>  *    </tr>  *    <tr>  *      <th colspan="3" align="left">isXHTML == false</th>  *    </tr>  *    <tr>  *      <td width="25">&#160;</td>  *      <th align="left">isExpandEmptyElements == true</th>  *      <td><code>  *      &lt;td&gt;&lt;/td&gt;<br />  *      &lt;br&gt;<br />  *      &lt;foo&gt;&lt;/foo&gt;</code>  *      </td>  *    </tr>  *    <tr>  *      <td width="25">&#160;</td>  *      <th align="left">isExpandEmptyElements == false</th>  *      <td><code>  *      &lt;td/&gt;<br />  *      &lt;br&gt;<br />  *      &lt;foo/&gt;</code>  *      </td>  *    </tr>  *  </table>  *  <p>  *   <p>  *  If isXHTML == true, CDATA sections look like this:  *    <PRE>  *    <b>&lt;myelement&gt;&lt;![CDATA[My data]]&gt;&lt;/myelement&gt;</b>  *    </PRE>  *  Otherwise, they look like this:  *   <PRE>  *    <b>&lt;myelement&gt;My data&lt;/myelement&gt;</b>  *   </PRE>  *   </p>  *  * Basically, {@link org.dom4j.io.OutputFormat#isXHTML() OutputFormat.isXHTML()} == true will produce valid XML,  *  while {@link org.dom4j.io.OutputFormat#isExpandEmptyElements() format.isExpandEmptyElements()}  *  determines whether empty elements are expanded  *  if isXHTML is true, excepting the special HTML single tags.  * </p>  *  *  * <p>Also, HTMLWriter handles tags whose contents should be preformatted, that is, whitespace-preserved.  * By default, this set includes the tags &lt;PRE&gt;, &lt;SCRIPT&gt;, &lt;STYLE&gt;, and &lt;TEXTAREA&gt;, case insensitively.  * It does not include &lt;IFRAME&gt;.  * Other tags, such as &lt;CODE&gt;, &lt;KBD&gt;, &lt;TT&gt;, &lt;VAR&gt;, are usually rendered in a different font in most browsers,  * but don't preserve whitespace, so they also don't appear in the default list.  HTML Comments  * are always whitespace-preserved.  However, the parser you use may store comments with linefeed-only  * text nodes (\n) even if your platform uses another line.separator character, and HTMLWriter outputs  * Comment nodes exactly as the DOM is set up by the parser.  * See examples and discussion here: {@link #setPreformattedTags(java.util.Set) setPreformattedTags}</p>  *  * <p><b>Examples</b></p>  *  <blockquote>  * <p><b>Pretty Printing</b></p>  * <p>This example shows how to pretty print a string containing a valid HTML document to a string.  *     You can also just call the static methods of this class:<br/>  *        {@link #prettyPrintHTML(String) prettyPrintHTML(String)}  *     or<br/>  *        {@link #prettyPrintHTML(String,boolean,boolean,boolean,boolean) prettyPrintHTML(String,boolean,boolean,boolean,boolean)}  *     or, <br/>  *        {@link #prettyPrintXHTML(String) prettyPrintXHTML(String)} for XHTML (note the X)  *     </p>  *     <pre>  *       String testPrettyPrint(String html){  *           StringWriter sw = new StringWriter();  *           org.dom4j.io.OutputFormat format = org.dom4j.io.OutputFormat.createPrettyPrint();  *           <font color='green'>//These are the default formats from createPrettyPrint, so you needn't set them:</font>  *           <font color='green'>//  format.setNewlines(true);</font>  *           <font color='green'>//  format.setTrimText(true);</font>  *           format.setXHTML(true);  <font color='green'>//Default is false, this produces XHTML</font>  *           org.dom4j.io.HTMLWriter writer = new org.dom4j.io.HTMLWriter(sw, format);  *           org.dom4j.Document document = org.dom4j.O3DocumentHelper.parseText(html);  *           writer.write(document);  *           writer.flush();  *           return sw.toString();  *       }  *     </pre>  *  *     <p>This example shows how to create a "squeezed" document, but one that will work in browsers  *      even if the browser line length is limited.  No newlines are included, no extra whitespace  *      at all, except where it it required by {@link #setPreformattedTags(java.util.Set) setPreformattedTags}.  *     </p>  *     <pre>  *       String testCrunch(String html){  *           StringWriter sw = new StringWriter();  *           org.dom4j.io.OutputFormat format = org.dom4j.io.OutputFormat.createPrettyPrint();  *           format.setNewlines(false);  *           format.setTrimText(true);  *           format.setIndent("");  *           format.setXHTML(true);  *           format.setExpandEmptyElements(false);  *           format.setNewLineAfterNTags(20); <font color='green'>//print a line every so often.</font>  *           org.dom4j.io.HTMLWriter writer = new org.dom4j.io.HTMLWriter(sw, format);  *           org.dom4j.Document document = org.dom4j.O3DocumentHelper.parseText(html);  *           writer.write(document);  *           writer.flush();  *           return sw.toString();  *       }  *     </pre>  *  *  </blockquote>  *  * </p>  *  * @author <a href="mailto:james.strachan@metastuff.com">James Strachan</a> (james.strachan@metastuff.com)  * @author Laramie Crocker  * @version $Revision: 1.3 $ */public class HTMLWriter extends XMLWriter {    public HTMLWriter(Writer writer) {        super( writer, defaultHtmlFormat );    }    public HTMLWriter(Writer writer, OutputFormat format) {        super( writer, format );    }    public HTMLWriter() throws UnsupportedEncodingException {        super( defaultHtmlFormat );    }    public HTMLWriter(OutputFormat format) throws UnsupportedEncodingException {        super( format );    }    public HTMLWriter(OutputStream out) throws UnsupportedEncodingException {        super( out, defaultHtmlFormat );    }    public HTMLWriter(OutputStream out, OutputFormat format) throws UnsupportedEncodingException {        super( out, format );    }    //Allows us to the current state of the format in this struct on the m_formatStack.    private class FormatState {        public FormatState(boolean newLines, boolean trimText, String indent){            this.m_Newlines = newLines;            this.m_TrimText = trimText;            this.m_indent = indent;        }        private boolean m_Newlines = false;        public boolean isNewlines(){return m_Newlines;}        private boolean m_TrimText = false;        public boolean isTrimText(){return m_TrimText;}        private String  m_indent = "";        public String  getIndent(){return m_indent;}    }    private java.util.Stack m_formatStack = new java.util.Stack();    private static String m_lineSeparator = System.getProperty("line.separator");    private String m_lastText = "";    private int m_tagsOuput = 0;    private int m_newLineAfterNTags = -1;  //legal values are 0+, but -1 signifies lazy initialization.    protected static final HashSet defaultPreformattedTags;    static {        //If you change this list, update the javadoc examples, above in the class javadoc,        //   in writeElement, and in setPreformattedTags().        defaultPreformattedTags = new HashSet();        defaultPreformattedTags.add("PRE");        defaultPreformattedTags.add("SCRIPT");        defaultPreformattedTags.add("STYLE");        defaultPreformattedTags.add("TEXTAREA");    }    private HashSet preformattedTags = defaultPreformattedTags;    protected static final OutputFormat defaultHtmlFormat;    static {        defaultHtmlFormat = new OutputFormat( "  ", true );        defaultHtmlFormat.setTrimText( true );        defaultHtmlFormat.setSuppressDeclaration( true );    }    /** Used to store the qualified element names which      * should have no close element tag      */    private HashSet omitElementCloseSet; //keep as a HashSet, but only show as a Set when asked for by getOmitElementCloseSet().    public void startCDATA() throws SAXException {    }    public void endCDATA() throws SAXException {    }    // Overloaded methods    // laramiec 3/21/2002 added isXHTML() stuff so you get the CDATA brackets if you desire.    protected void writeCDATA(String text) throws IOException {        // XXX: Should we escape entities?        // writer.write( escapeElementEntities( text ) );        if ( getOutputFormat().isXHTML() ) {            super.writeCDATA(text);        } else {            writer.write( text );        }        lastOutputNodeType = Node.CDATA_SECTION_NODE;    }    protected void writeEntity(Entity entity) throws IOException {        writer.write(entity.getText());        lastOutputNodeType = Node.ENTITY_REFERENCE_NODE;    }    protected void writeDeclaration() throws IOException {    }    protected void writeString(String text) throws IOException {        //DOM stores \n at the end of text nodes that are newlines.  This is significant if        // we are in a PRE section.  However, we only want to output the system line.separator, not \n.        // This is a little brittle, but this function appears to be called with these lineseparators        // as a separate TEXT_NODE.  If we are in a preformatted section, output the right line.separator,        // otherwise ditch.  If the single \n character is not the text, then do the super thing        // to output the text.        // Also, we store the last text that was not a \n since it may be used by writeElement in this class to        // line up preformatted tags.        if ( text.equals("\n")){            if ( ! m_formatStack.empty() ) {                super.writeString(m_lineSeparator);            }            return;        }        m_lastText = text;        if ( m_formatStack.empty() ) {            super.writeString(text.trim());        } else {            super.writeString(text);        }    }    /** Overriden method to not close certain element names to avoid      * wierd behaviour from browsers for versions up to 5.x      */    protected void writeClose(String qualifiedName) throws IOException {        if ( ! omitElementClose( qualifiedName ) ) {            super.writeClose(qualifiedName);        }    }    protected void writeEmptyElementClose(String qualifiedName) throws IOException {        if (getOutputFormat().isXHTML()){            //xhtml, always check with format object whether to expand or not.            if ( omitElementClose(qualifiedName) ) {                // it was a special omit tag, do it the XHTML way: "<br/>", ignoring the expansion option,                // since <br></br> is OK XML, but produces twice the linefeeds desired in the browser.                // for netscape 4.7, though all are fine with it, write a space before the close slash.                writer.write(" />");            } else {                super.writeEmptyElementClose(qualifiedName);            }        } else {            //html, not xhtml            if ( omitElementClose(qualifiedName) ) {                // it was a special omit tag, do it the old html way: "<br>".                writer.write(">");            } else {                // it was NOT a special omit tag, check with format object whether to expand or not.                super.writeEmptyElementClose(qualifiedName);            }        }    }    protected boolean omitElementClose( String qualifiedName ) {        return internalGetOmitElementCloseSet().contains( qualifiedName.toUpperCase() );    }    private HashSet internalGetOmitElementCloseSet() {        if (omitElementCloseSet == null) {            omitElementCloseSet = new HashSet();            loadOmitElementCloseSet(omitElementCloseSet);        }        return omitElementCloseSet;    }    //If you change this, change the javadoc for getOmitElementCloseSet.    protected void loadOmitElementCloseSet(Set set) {        set.add( "AREA" );        set.add( "BASE" );        set.add( "BR" );        set.add( "COL" );        set.add( "HR" );        set.add( "IMG" );        set.add( "INPUT" );        set.add( "LINK" );        set.add( "META" );        set.add( "P" );        set.add( "PARAM" );    }    //let the people see the set, but not modify it.    /** A clone of the Set of elements that can have their close-tags omitted.  By default it     *  should be     *   "AREA",     *   "BASE",     *   "BR",     *   "COL",     *   "HR",     *   "IMG",     *   "INPUT",     *   "LINK",     *   "META",
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -