📄 htmlwriter.java
字号:
/*
* Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
*
* This software is open source.
* See the bottom of this file for the licence.
*/
package org.dom4j.io;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Stack;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Entity;
import org.dom4j.Node;
import org.xml.sax.SAXException;
/**
* <p>
* <code>HTMLWriter</code> takes a DOM4J tree and formats it to a stream as
* HTML. This formatter is similar to XMLWriter but it outputs the text of CDATA
* and Entity sections rather than the serialised format as in XML, it has an
* XHTML mode, it retains whitespace in certain elements such as <PRE>,
* and it supports certain elements which have no corresponding close tag such
* as for <BR> and <P>.
* </p>
*
* <p>
* The OutputFormat passed in to the constructor is checked for isXHTML() and
* isExpandEmptyElements(). See {@link OutputFormat OutputFormat}for details.
* Here are the rules for <b>this class </b> based on an OutputFormat, "format",
* passed in to the constructor: <br/><br/>
*
* <ul>
* <li>If an element is in {@link #getOmitElementCloseSet()
* getOmitElementCloseSet}, then it is treated specially:
*
* <ul>
* <li>It never expands, since some browsers treat this as two separate
* Horizontal Rules: <HR></HR></li>
* <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, then
* it has a space before the closing single-tag slash, since Netscape 4.x-
* treats this: <HR /> as an element named "HR" with an attribute named
* "/", but that's better than when it refuses to recognize this: <hr/>
* which it thinks is an element named "HR/".</li>
* </ul>
*
* </li>
* <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, all
* elements must have either a close element, or be a closed single tag.</li>
* <li>If {@link org.dom4j.io.OutputFormat#isExpandEmptyElements()
* format.isExpandEmptyElements()}() is true, all elements are expanded except
* as above.</li>
* </ul>
*
* <b>Examples </b>
* </p>
*
* <p>
* </p>
*
* <p>
* If isXHTML == true, CDATA sections look like this:
*
* <PRE>
*
* <b><myelement><![CDATA[My data]]></myelement> </b>
*
* </PRE>
*
* Otherwise, they look like this:
*
* <PRE>
*
* <b><myelement>My data</myelement> </b>
*
* </PRE>
*
* </p>
*
* <p>
* Basically, {@link OutputFormat.isXHTML() OutputFormat.isXHTML()} ==
* <code>true</code> will produce valid XML, while {@link
* org.dom4j.io.OutputFormat#isExpandEmptyElements()
* format.isExpandEmptyElements()} determines whether empty elements are
* expanded if isXHTML is true, excepting the special HTML single tags.
* </p>
*
* <p>
* Also, HTMLWriter handles tags whose contents should be preformatted, that is,
* whitespace-preserved. By default, this set includes the tags <PRE>,
* <SCRIPT>, <STYLE>, and <TEXTAREA>, case insensitively. It
* does not include <IFRAME>. Other tags, such as <CODE>,
* <KBD>, <TT>, <VAR>, are usually rendered in a different
* font in most browsers, but don't preserve whitespace, so they also don't
* appear in the default list. HTML Comments are always whitespace-preserved.
* However, the parser you use may store comments with linefeed-only text nodes
* (\n) even if your platform uses another line.separator character, and
* HTMLWriter outputs Comment nodes exactly as the DOM is set up by the parser.
* See examples and discussion here: {@link#setPreformattedTags(java.util.Set)
* setPreformattedTags}
* </p>
*
* <p>
* <b>Examples </b>
* </p>
* <blockquote>
* <p>
* <b>Pretty Printing </b>
* </p>
*
* <p>
* This example shows how to pretty print a string containing a valid HTML
* document to a string. You can also just call the static methods of this
* class: <br>
* {@link #prettyPrintHTML(String) prettyPrintHTML(String)}or <br>
* {@link #prettyPrintHTML(String,boolean,boolean,boolean,boolean)
* prettyPrintHTML(String,boolean,boolean,boolean,boolean)} or, <br>
* {@link #prettyPrintXHTML(String) prettyPrintXHTML(String)}for XHTML (note
* the X)
* </p>
*
* <pre>
* String testPrettyPrint(String html) {
* StringWriter sw = new StringWriter();
* OutputFormat format = OutputFormat.createPrettyPrint();
* // These are the default values for createPrettyPrint,
* // so you needn't set them:
* // format.setNewlines(true);
* // format.setTrimText(true);</font>
* format.setXHTML(true);
* HTMLWriter writer = new HTMLWriter(sw, format);
* Document document = DocumentHelper.parseText(html);
* writer.write(document);
* writer.flush();
* return sw.toString();
* }
* </pre>
*
* <p>
* This example shows how to create a "squeezed" document, but one that will
* work in browsers even if the browser line length is limited. No newlines are
* included, no extra whitespace at all, except where it it required by
* {@link #setPreformattedTags(java.util.Set) setPreformattedTags}.
* </p>
*
* <pre>
* String testCrunch(String html) {
* StringWriter sw = new StringWriter();
* OutputFormat format = OutputFormat.createPrettyPrint();
* format.setNewlines(false);
* format.setTrimText(true);
* format.setIndent("");
* format.setXHTML(true);
* format.setExpandEmptyElements(false);
* format.setNewLineAfterNTags(20);
* org.dom4j.io.HTMLWriter writer = new HTMLWriter(sw, format);
* org.dom4j.Document document = DocumentHelper.parseText(html);
* writer.write(document);
* writer.flush();
* return sw.toString();
* }
* </pre>
*
* </blockquote>
*
* @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a>
* @author Laramie Crocker
* @version $Revision: 1.21 $
*/
public class HTMLWriter extends XMLWriter {
private static String lineSeparator = System.getProperty("line.separator");
protected static final HashSet DEFAULT_PREFORMATTED_TAGS;
static {
// If you change this list, update the javadoc examples, above in the
// class javadoc, in writeElement, and in setPreformattedTags().
DEFAULT_PREFORMATTED_TAGS = new HashSet();
DEFAULT_PREFORMATTED_TAGS.add("PRE");
DEFAULT_PREFORMATTED_TAGS.add("SCRIPT");
DEFAULT_PREFORMATTED_TAGS.add("STYLE");
DEFAULT_PREFORMATTED_TAGS.add("TEXTAREA");
}
protected static final OutputFormat DEFAULT_HTML_FORMAT;
static {
DEFAULT_HTML_FORMAT = new OutputFormat(" ", true);
DEFAULT_HTML_FORMAT.setTrimText(true);
DEFAULT_HTML_FORMAT.setSuppressDeclaration(true);
}
private Stack formatStack = new Stack();
private String lastText = "";
private int tagsOuput = 0;
// legal values are 0+, but -1 signifies lazy initialization.
private int newLineAfterNTags = -1;
private HashSet preformattedTags = DEFAULT_PREFORMATTED_TAGS;
/**
* Used to store the qualified element names which should have no close
* element tag
*/
private HashSet omitElementCloseSet;
public HTMLWriter(Writer writer) {
super(writer, DEFAULT_HTML_FORMAT);
}
public HTMLWriter(Writer writer, OutputFormat format) {
super(writer, format);
}
public HTMLWriter() throws UnsupportedEncodingException {
super(DEFAULT_HTML_FORMAT);
}
public HTMLWriter(OutputFormat format) throws UnsupportedEncodingException {
super(format);
}
public HTMLWriter(OutputStream out) throws UnsupportedEncodingException {
super(out, DEFAULT_HTML_FORMAT);
}
public HTMLWriter(OutputStream out, OutputFormat format)
throws UnsupportedEncodingException {
super(out, format);
}
public void startCDATA() throws SAXException {
}
public void endCDATA() throws SAXException {
}
// Overloaded methods
// added isXHTML() stuff so you get the CDATA brackets if you desire.
protected void writeCDATA(String text) throws IOException {
// XXX: Should we escape entities?
// writer.write( escapeElementEntities( text ) );
if (getOutputFormat().isXHTML()) {
super.writeCDATA(text);
} else {
writer.write(text);
}
lastOutputNodeType = Node.CDATA_SECTION_NODE;
}
protected void writeEntity(Entity entity) throws IOException {
writer.write(entity.getText());
lastOutputNodeType = Node.ENTITY_REFERENCE_NODE;
}
protected void writeDeclaration() throws IOException {
}
protected void writeString(String text) throws IOException {
/*
* DOM stores \n at the end of text nodes that are newlines. This is
* significant if we are in a PRE section. However, we only want to
* output the system line.separator, not \n. This is a little brittle,
* but this function appears to be called with these lineseparators as a
* separate TEXT_NODE. If we are in a preformatted section, output the
* right line.separator, otherwise ditch. If the single \n character is
* not the text, then do the super thing to output the text.
*
* Also, we store the last text that was not a \n since it may be used
* by writeElement in this class to line up preformatted tags.
*/
if (text.equals("\n")) {
if (!formatStack.empty()) {
super.writeString(lineSeparator);
}
return;
}
lastText = text;
if (formatStack.empty()) {
super.writeString(text.trim());
} else {
super.writeString(text);
}
}
/**
* Overriden method to not close certain element names to avoid wierd
* behaviour from browsers for versions up to 5.x
*
* @param qualifiedName
* DOCUMENT ME!
*
* @throws IOException
* DOCUMENT ME!
*/
protected void writeClose(String qualifiedName) throws IOException {
if (!omitElementClose(qualifiedName)) {
super.writeClose(qualifiedName);
}
}
protected void writeEmptyElementClose(String qualifiedName)
throws IOException {
if (getOutputFormat().isXHTML()) {
// xhtml, always check with format object whether to expand or not.
if (omitElementClose(qualifiedName)) {
// it was a special omit tag, do it the XHTML way: "<br/>",
// ignoring the expansion option, since <br></br> is OK XML,
// but produces twice the linefeeds desired in the browser.
// for netscape 4.7, though all are fine with it, write a space
// before the close slash.
writer.write(" />");
} else {
super.writeEmptyElementClose(qualifiedName);
}
} else {
// html, not xhtml
if (omitElementClose(qualifiedName)) {
// it was a special omit tag, do it the old html way: "<br>".
writer.write(">");
} else {
// it was NOT a special omit tag, check with format object
// whether to expand or not.
super.writeEmptyElementClose(qualifiedName);
}
}
}
protected boolean omitElementClose(String qualifiedName) {
return internalGetOmitElementCloseSet().contains(
qualifiedName.toUpperCase());
}
private HashSet internalGetOmitElementCloseSet() {
if (omitElementCloseSet == null) {
omitElementCloseSet = new HashSet();
loadOmitElementCloseSet(omitElementCloseSet);
}
return omitElementCloseSet;
}
// If you change this, change the javadoc for getOmitElementCloseSet.
protected void loadOmitElementCloseSet(Set set) {
set.add("AREA");
set.add("BASE");
set.add("BR");
set.add("COL");
set.add("HR");
set.add("IMG");
set.add("INPUT");
set.add("LINK");
set.add("META");
set.add("P");
set.add("PARAM");
}
// let the people see the set, but not modify it.
/**
* A clone of the Set of elements that can have their close-tags omitted. By
* default it should be "AREA", "BASE", "BR", "COL", "HR", "IMG", "INPUT",
* "LINK", "META", "P", "PARAM"
*
* @return A clone of the Set.
*/
public Set getOmitElementCloseSet() {
return (Set) (internalGetOmitElementCloseSet().clone());
}
/**
* To use the empty set, pass an empty Set, or null:
*
* <pre>
*
*
* setOmitElementCloseSet(new HashSet());
* or
* setOmitElementCloseSet(null);
*
*
* </pre>
*
* @param newSet
* DOCUMENT ME!
*/
public void setOmitElementCloseSet(Set newSet) {
// resets, and safely empties it out if newSet is null.
omitElementCloseSet = new HashSet();
if (newSet != null) {
omitElementCloseSet = new HashSet();
Object aTag;
Iterator iter = newSet.iterator();
while (iter.hasNext()) {
aTag = iter.next();
if (aTag != null) {
omitElementCloseSet.add(aTag.toString().toUpperCase());
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -