xmlwriter.java

来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 1,917 行 · 第 1/4 页

JAVA
1,917
字号
/* * Copyright (C) 1999-2001 David Brownell *  * This file is part of GNU JAXP, a library. * * GNU JAXP is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *  * GNU JAXP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. *  * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * * As a special exception, if you link this library with other files to * produce an executable, this library does not by itself cause the * resulting executable to be covered by the GNU General Public License. * This exception does not however invalidate any other reasons why the * executable file might be covered by the GNU General Public License.  */package gnu.xml.util;import java.io.BufferedWriter;import java.io.CharConversionException;import java.io.IOException;import java.io.OutputStream;import java.io.OutputStreamWriter;import java.io.Writer;import java.util.Stack;import org.xml.sax.*;import org.xml.sax.ext.*;import org.xml.sax.helpers.*;/** * This class is a SAX handler which writes all its input as a well formed * XML or XHTML document.  If driven using SAX2 events, this output may * include a recreated document type declaration, subject to limitations * of SAX (no internal subset exposed) or DOM (the important declarations, * with their documentation, are discarded). * * <p> By default, text is generated "as-is", but some optional modes * are supported.  Pretty-printing is supported, to make life easier * for people reading the output.  XHTML (1.0) output has can be made * particularly pretty; all the built-in character entities are known. * Canonical XML can also be generated, assuming the input is properly * formed. * * <hr> * * <p> Some of the methods on this class are intended for applications to * use directly, rather than as pure SAX2 event callbacks.  Some of those * methods access the JavaBeans properties (used to tweak output formats, * for example canonicalization and pretty printing).  Subclasses * are expected to add new behaviors, not to modify current behavior, so * many such methods are final.</p> * * <p> The <em>write*()</em> methods may be slightly simpler for some * applications to use than direct callbacks.  For example, they support * a simple policy for encoding data items as the content of a single element. * * <p> To reuse an XMLWriter you must provide it with a new Writer, since * this handler closes the writer it was given as part of its endDocument() * handling.  (XML documents have an end of input, and the way to encode * that on a stream is to close it.) </p> * * <hr> * * <p> Note that any relative URIs in the source document, as found in * entity and notation declarations, ought to have been fully resolved by * the parser providing events to this handler.  This means that the * output text should only have fully resolved URIs, which may not be * the desired behavior in cases where later binding is desired. </p> * * <p> <em>Note that due to SAX2 defaults, you may need to manually * ensure that the input events are XML-conformant with respect to namespace * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is * one solution to this problem, in the context of processing pipelines.</em> * Something as simple as connecting this handler to a parser might not * generate the correct output.  Another workaround is to ensure that the * <em>namespace-prefixes</em> feature is always set to true, if you're * hooking this directly up to some XMLReader implementation. * * @see gnu.xml.pipeline.TextConsumer * * @author David Brownell */public class XMLWriter    implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler{    // text prints/escapes differently depending on context    //	CTX_ENTITY ... entity literal value    //	CTX_ATTRIBUTE ... attribute literal value    //	CTX_CONTENT ... content of an element    //	CTX_UNPARSED ... CDATA, comment, PI, names, etc    //  CTX_NAME ... name or nmtoken, no escapes possible    private static final int	CTX_ENTITY = 1;    private static final int	CTX_ATTRIBUTE = 2;    private static final int	CTX_CONTENT = 3;    private static final int	CTX_UNPARSED = 4;    private static final int	CTX_NAME = 5;// FIXME: names (element, attribute, PI, notation, etc) are not// currently written out with range checks (escapeChars).// In non-XHTML, some names can't be directly written; panic!    private static String	sysEOL;    static {	try {	    sysEOL = System.getProperty ("line.separator", "\n");	    // don't use the system's EOL if it's illegal XML.	    if (!isLineEnd (sysEOL))		sysEOL = "\n";	} catch (SecurityException e) {	    sysEOL = "\n";	}    }    private static boolean isLineEnd (String eol)    {	return "\n".equals (eol)		    || "\r".equals (eol)		    || "\r\n".equals (eol);    }    private Writer		out;    private boolean		inCDATA;    private int			elementNestLevel;    private String		eol = sysEOL;    private short		dangerMask;    private StringBuffer	stringBuf;    private Locator		locator;    private ErrorHandler	errHandler;    private boolean		expandingEntities = false;    private int			entityNestLevel;    private boolean		xhtml;    private boolean		startedDoctype;    private String		encoding;    private boolean		canonical;    private boolean		inDoctype;    private boolean		inEpilogue;    // pretty printing controls    private boolean		prettyPrinting;    private int			column;    private boolean		noWrap;    private Stack		space = new Stack ();    // this is not a hard'n'fast rule -- longer lines are OK,    // but are to be avoided.  Here, prettyprinting is more to    // show structure "cleanly" than to be precise about it.    // better to have ragged layout than one line 24Kb long.    private static final int	lineLength = 75;    /**     * Constructs this handler with System.out used to write SAX events     * using the UTF-8 encoding.  Avoid using this except when you know     * it's safe to close System.out at the end of the document.     */    public XMLWriter () throws IOException	{ this (System.out); }    /**     * Constructs a handler which writes all input to the output stream     * in the UTF-8 encoding, and closes it when endDocument is called.     * (Yes it's annoying that this throws an exception -- but there's     * really no way around it, since it's barely possible a JDK may     * exist somewhere that doesn't know how to emit UTF-8.)     */    public XMLWriter (OutputStream out) throws IOException    {	this (new OutputStreamWriter (out, "UTF8"));    }    /**     * Constructs a handler which writes all input to the writer, and then     * closes the writer when the document ends.  If an XML declaration is     * written onto the output, and this class can determine the name of     * the character encoding for this writer, that encoding name will be     * included in the XML declaration.     *     * <P> See the description of the constructor which takes an encoding     * name for imporant information about selection of encodings.     *     * @param writer XML text is written to this writer.     */    public XMLWriter (Writer writer)    {	this (writer, null);    }    /**     * Constructs a handler which writes all input to the writer, and then     * closes the writer when the document ends.  If an XML declaration is     * written onto the output, this class will use the specified encoding     * name in that declaration.  If no encoding name is specified, no     * encoding name will be declared unless this class can otherwise     * determine the name of the character encoding for this writer.     *     * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")     * output encodings are fully lossless with respect to XML data.  If you     * use any other encoding you risk having your data be silently mangled     * on output, as the standard Java character encoding subsystem silently     * maps non-encodable characters to a question mark ("?") and will not     * report such errors to applications.     *     * <p> For a few other encodings the risk can be reduced. If the writer is     * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",     * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which     * can't be encoded in those encodings will be written safely.  Where     * relevant, the XHTML entity names will be used; otherwise, numeric     * character references will be emitted.     *     * <P> However, there remain a number of cases where substituting such     * entity or character references is not an option.  Such references are     * not usable within a DTD, comment, PI, or CDATA section.  Neither may     * they be used when element, attribute, entity, or notation names have     * the problematic characters.     *     * @param writer XML text is written to this writer.     * @param encoding if non-null, and an XML declaration is written,     *	this is the name that will be used for the character encoding.     */    public XMLWriter (Writer writer, String encoding)    {	setWriter (writer, encoding);    }        private void setEncoding (String encoding)    {	if (encoding == null && out instanceof OutputStreamWriter)	    encoding = ((OutputStreamWriter)out).getEncoding ();	if (encoding != null) {	    encoding = encoding.toUpperCase ();	    // Use official encoding names where we know them,	    // avoiding the Java-only names.  When using common	    // encodings where we can easily tell if characters	    // are out of range, we'll escape out-of-range	    // characters using character refs for safety.	    // I _think_ these are all the main synonyms for these!	    if ("UTF8".equals (encoding)) {		encoding = "UTF-8";	    } else if ("US-ASCII".equals (encoding)		    || "ASCII".equals (encoding)) {		dangerMask = (short) 0xff80;		encoding = "US-ASCII";	    } else if ("ISO-8859-1".equals (encoding)		    || "8859_1".equals (encoding)		    || "ISO8859_1".equals (encoding)) {		dangerMask = (short) 0xff00;		encoding = "ISO-8859-1";	    } else if ("UNICODE".equals (encoding)		    || "UNICODE-BIG".equals (encoding)		    || "UNICODE-LITTLE".equals (encoding)) {		encoding = "UTF-16";		// TODO: UTF-16BE, UTF-16LE ... no BOM; what		// release of JDK supports those Unicode names?	    }	    if (dangerMask != 0)		stringBuf = new StringBuffer ();	}	this.encoding = encoding;    }    /**     * Resets the handler to write a new text document.     *     * @param writer XML text is written to this writer.     * @param encoding if non-null, and an XML declaration is written,     *	this is the name that will be used for the character encoding.     *     * @exception IllegalStateException if the current     *	document hasn't yet ended (with {@link #endDocument})     */    final public void setWriter (Writer writer, String encoding)    {	if (out != null)	    throw new IllegalStateException (		"can't change stream in mid course");	out = writer;	if (out != null)	    setEncoding (encoding);	if (!(out instanceof BufferedWriter))	    out = new BufferedWriter (out);	space.push ("default");    }    /**     * Assigns the line ending style to be used on output.     * @param eolString null to use the system default; else     *	"\n", "\r", or "\r\n".     */    final public void setEOL (String eolString)    {	if (eolString == null)	    eol = sysEOL;	else if (!isLineEnd (eolString))	    eol = eolString;	else	    throw new IllegalArgumentException (eolString);    }    /**     * Assigns the error handler to be used to present most fatal     * errors.     */    public void setErrorHandler (ErrorHandler handler)    {	errHandler = handler;    }    /**     * Used internally and by subclasses, this encapsulates the logic     * involved in reporting fatal errors.  It uses locator information     * for good diagnostics, if available, and gives the application's     * ErrorHandler the opportunity to handle the error before throwing     * an exception.     */    protected void fatal (String message, Exception e)    throws SAXException    {	SAXParseException	x;	if (locator == null)	    x = new SAXParseException (message, null, null, -1, -1, e);	else	    x = new SAXParseException (message, locator, e);	if (errHandler != null)	    errHandler.fatalError (x);	throw x;    }    // JavaBeans properties    /**     * Controls whether the output should attempt to follow the "transitional"     * XHTML rules so that it meets the "HTML Compatibility Guidelines"     * appendix in the XHTML specification.  A "transitional" Document Type     * Declaration (DTD) is placed near the beginning of the output document,     * instead of whatever DTD would otherwise have been placed there, and     * XHTML empty elements are printed specially.  When writing text in     * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal     * entity names are used (in preference to character references) when     * writing content characters which can't be expressed in those encodings.     *     * <p> When this option is enabled, it is the caller's responsibility     * to ensure that the input is otherwise valid as XHTML.  Things to     * be careful of in all cases, as described in the appendix referenced     * above, include:  <ul>     *     *	<li> Element and attribute names must be in lower case, both     *		in the document and in any CSS style sheet.     *	<li> All XML constructs must be valid as defined by the XHTML     *		"transitional" DTD (including all familiar constructs,     *		even deprecated ones).     *	<li> The root element must be "html".     *	<li> Elements that must be empty (such as <em>&lt;br&gt;</em>     *		must have no content.     *	<li> Use both <em>lang</em> and <em>xml:lang</em> attributes     *		when specifying language.     *	<li> Similarly, use both <em>id</em> and <em>name</em> attributes     *		when defining elements that may be referred to through     *		URI fragment identifiers ... and make sure that the     *		value is a legal NMTOKEN, since not all such HTML 4.0     *		identifiers are valid in XML.     *	<li> Be careful with character encodings; make sure you provide     *		a <em>&lt;meta http-equiv="Content-type"     *		content="text/xml;charset=..." /&gt;</em> element in     *		the HTML "head" element, naming the same encoding     *		used to create this handler.  Also, if that encoding     *		is anything other than US-ASCII, make sure that if     *		the document is given a MIME content type, it has     *		a <em>charset=...</em> attribute with that encoding.     *	</ul>     *     * <p> Additionally, some of the oldest browsers have additional     * quirks, to address with guidelines such as: <ul>     *     *	<li> Processing instructions may be rendered, so avoid them.     *		(Similarly for an XML declaration.)     *	<li> Embedded style sheets and scripts should not contain XML     *		markup delimiters:  &amp;, &lt;, and ]]&gt; are trouble.     *	<li> Attribute values should not have line breaks or multiple     *		consecutive white space characters.     *	<li> Use no more than one of the deprecated (transitional)     *		<em>&lt;isindex&gt;</em> elements.     *	<li> Some boolean attributes (such as <em>compact, checked,     *		disabled, readonly, selected,</em> and more) confuse     *		some browsers, since they only understand minimized     *		versions which are illegal in XML.     *	</ul>     *     * <p> Also, some characteristics of the resulting output may be     * a function of whether the document is later given a MIME     * content type of <em>text/html</em> rather than one indicating     * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,     * some browsers ignore MIME content types and prefer to rely URI     * name suffixes -- so an "index.xml" could always be XML, never     * XHTML, no matter its MIME type.     */    final public void setXhtml (boolean value)    {	if (locator != null)	    throw new IllegalStateException ("started parsing");	xhtml = value;	if (xhtml)	    canonical = false;    }    /**     * Returns true if the output attempts to echo the input following     * "transitional" XHTML rules and matching the "HTML Compatibility     * Guidelines" so that an HTML version 3 browser can read the output     * as HTML; returns false (the default) othewise.     */    final public boolean isXhtml ()    {	return xhtml;    }    /**     * Controls whether the output text contains references to     * entities (the default), or instead contains the expanded     * values of those entities.     */    final public void setExpandingEntities (boolean value)    {	if (locator != null)	    throw new IllegalStateException ("started parsing");	expandingEntities = value;	if (!expandingEntities)	    canonical = false;    }    /**     * Returns true if the output will have no entity references;     * returns false (the default) otherwise.     */    final public boolean isExpandingEntities ()    {	return expandingEntities;    }    /**     * Controls pretty-printing, which by default is not enabled     * (and currently is most useful for XHTML output).     * Pretty printing enables structural indentation, sorting of attributes     * by name, line wrapping, and potentially other mechanisms for making     * output more or less readable.     *     * <p> At this writing, structural indentation and line wrapping are     * enabled when pretty printing is enabled and the <em>xml:space</em>     * attribute has the value <em>default</em> (its other legal value is     * <em>preserve</em>, as defined in the XML specification).  The three     * XHTML element types which use another value are recognized by their     * names (namespaces are ignored).     *     * <p> Also, for the record, the "pretty" aspect of printing here

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?