📄 xmlwriter.java

📁 gcc的组建
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* XMLWriter.java --    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.This file is part of GNU Classpath.GNU Classpath is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU Classpath is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU Classpath; see the file COPYING.  If not, write to theFree Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA02110-1301 USA.Linking this library statically or dynamically with other modules ismaking a combined work based on this library.  Thus, the terms andconditions of the GNU General Public License cover the wholecombination.As a special exception, the copyright holders of this library give youpermission to link this library with independent modules to produce anexecutable, regardless of the license terms of these independentmodules, and to copy and distribute the resulting executable underterms of your choice, provided that you also meet, for each linkedindependent module, the terms and conditions of the license of thatmodule.  An independent module is a module which is not derived fromor based on this library.  If you modify this library, you may extendthis exception to your version of the library, but you are notobligated to do so.  If you do not wish to do so, delete thisexception statement from your version. */package gnu.xml.util;import java.io.BufferedWriter;import java.io.CharConversionException;import java.io.IOException;import java.io.OutputStream;import java.io.OutputStreamWriter;import java.io.Writer;import java.util.Stack;import org.xml.sax.*;import org.xml.sax.ext.*;import org.xml.sax.helpers.*;/** * This class is a SAX handler which writes all its input as a well formed * XML or XHTML document.  If driven using SAX2 events, this output may * include a recreated document type declaration, subject to limitations * of SAX (no internal subset exposed) or DOM (the important declarations, * with their documentation, are discarded). * * <p> By default, text is generated "as-is", but some optional modes * are supported.  Pretty-printing is supported, to make life easier * for people reading the output.  XHTML (1.0) output has can be made * particularly pretty; all the built-in character entities are known. * Canonical XML can also be generated, assuming the input is properly * formed. * * <hr> * * <p> Some of the methods on this class are intended for applications to * use directly, rather than as pure SAX2 event callbacks.  Some of those * methods access the JavaBeans properties (used to tweak output formats, * for example canonicalization and pretty printing).  Subclasses * are expected to add new behaviors, not to modify current behavior, so * many such methods are final.</p> * * <p> The <em>write*()</em> methods may be slightly simpler for some * applications to use than direct callbacks.  For example, they support * a simple policy for encoding data items as the content of a single element. * * <p> To reuse an XMLWriter you must provide it with a new Writer, since * this handler closes the writer it was given as part of its endDocument() * handling.  (XML documents have an end of input, and the way to encode * that on a stream is to close it.) </p> * * <hr> * * <p> Note that any relative URIs in the source document, as found in * entity and notation declarations, ought to have been fully resolved by * the parser providing events to this handler.  This means that the * output text should only have fully resolved URIs, which may not be * the desired behavior in cases where later binding is desired. </p> * * <p> <em>Note that due to SAX2 defaults, you may need to manually * ensure that the input events are XML-conformant with respect to namespace * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is * one solution to this problem, in the context of processing pipelines.</em> * Something as simple as connecting this handler to a parser might not * generate the correct output.  Another workaround is to ensure that the * <em>namespace-prefixes</em> feature is always set to true, if you're * hooking this directly up to some XMLReader implementation. * * @see gnu.xml.pipeline.TextConsumer * * @author David Brownell */public class XMLWriter    implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler{    // text prints/escapes differently depending on context    //	CTX_ENTITY ... entity literal value    //	CTX_ATTRIBUTE ... attribute literal value    //	CTX_CONTENT ... content of an element    //	CTX_UNPARSED ... CDATA, comment, PI, names, etc    //  CTX_NAME ... name or nmtoken, no escapes possible    private static final int	CTX_ENTITY = 1;    private static final int	CTX_ATTRIBUTE = 2;    private static final int	CTX_CONTENT = 3;    private static final int	CTX_UNPARSED = 4;    private static final int	CTX_NAME = 5;// FIXME: names (element, attribute, PI, notation, etc) are not// currently written out with range checks (escapeChars).// In non-XHTML, some names can't be directly written; panic!    private static String	sysEOL;    static {	try {	    sysEOL = System.getProperty ("line.separator", "\n");	    // don't use the system's EOL if it's illegal XML.	    if (!isLineEnd (sysEOL))		sysEOL = "\n";	} catch (SecurityException e) {	    sysEOL = "\n";	}    }    private static boolean isLineEnd (String eol)    {	return "\n".equals (eol)		    || "\r".equals (eol)		    || "\r\n".equals (eol);    }    private Writer		out;    private boolean		inCDATA;    private int			elementNestLevel;    private String		eol = sysEOL;    private short		dangerMask;    private StringBuffer	stringBuf;    private Locator		locator;    private ErrorHandler	errHandler;    private boolean		expandingEntities = false;    private int			entityNestLevel;    private boolean		xhtml;    private boolean		startedDoctype;    private String		encoding;    private boolean		canonical;    private boolean		inDoctype;    private boolean		inEpilogue;    // pretty printing controls    private boolean		prettyPrinting;    private int			column;    private boolean		noWrap;    private Stack		space = new Stack ();    // this is not a hard'n'fast rule -- longer lines are OK,    // but are to be avoided.  Here, prettyprinting is more to    // show structure "cleanly" than to be precise about it.    // better to have ragged layout than one line 24Kb long.    private static final int	lineLength = 75;    /**     * Constructs this handler with System.out used to write SAX events     * using the UTF-8 encoding.  Avoid using this except when you know     * it's safe to close System.out at the end of the document.     */    public XMLWriter () throws IOException	{ this (System.out); }    /**     * Constructs a handler which writes all input to the output stream     * in the UTF-8 encoding, and closes it when endDocument is called.     * (Yes it's annoying that this throws an exception -- but there's     * really no way around it, since it's barely possible a JDK may     * exist somewhere that doesn't know how to emit UTF-8.)     */    public XMLWriter (OutputStream out) throws IOException    {	this (new OutputStreamWriter (out, "UTF8"));    }    /**     * Constructs a handler which writes all input to the writer, and then     * closes the writer when the document ends.  If an XML declaration is     * written onto the output, and this class can determine the name of     * the character encoding for this writer, that encoding name will be     * included in the XML declaration.     *     * <P> See the description of the constructor which takes an encoding     * name for imporant information about selection of encodings.     *     * @param writer XML text is written to this writer.     */    public XMLWriter (Writer writer)    {	this (writer, null);    }    /**     * Constructs a handler which writes all input to the writer, and then     * closes the writer when the document ends.  If an XML declaration is     * written onto the output, this class will use the specified encoding     * name in that declaration.  If no encoding name is specified, no     * encoding name will be declared unless this class can otherwise     * determine the name of the character encoding for this writer.     *     * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")     * output encodings are fully lossless with respect to XML data.  If you     * use any other encoding you risk having your data be silently mangled     * on output, as the standard Java character encoding subsystem silently     * maps non-encodable characters to a question mark ("?") and will not     * report such errors to applications.     *     * <p> For a few other encodings the risk can be reduced. If the writer is     * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",     * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which     * can't be encoded in those encodings will be written safely.  Where     * relevant, the XHTML entity names will be used; otherwise, numeric     * character references will be emitted.     *     * <P> However, there remain a number of cases where substituting such     * entity or character references is not an option.  Such references are     * not usable within a DTD, comment, PI, or CDATA section.  Neither may     * they be used when element, attribute, entity, or notation names have     * the problematic characters.     *     * @param writer XML text is written to this writer.     * @param encoding if non-null, and an XML declaration is written,     *	this is the name that will be used for the character encoding.     */    public XMLWriter (Writer writer, String encoding)    {	setWriter (writer, encoding);    }        private void setEncoding (String encoding)    {	if (encoding == null && out instanceof OutputStreamWriter)	    encoding = ((OutputStreamWriter)out).getEncoding ();	if (encoding != null) {	    encoding = encoding.toUpperCase ();	    // Use official encoding names where we know them,	    // avoiding the Java-only names.  When using common	    // encodings where we can easily tell if characters	    // are out of range, we'll escape out-of-range	    // characters using character refs for safety.	    // I _think_ these are all the main synonyms for these!	    if ("UTF8".equals (encoding)) {		encoding = "UTF-8";	    } else if ("US-ASCII".equals (encoding)		    || "ASCII".equals (encoding)) {		dangerMask = (short) 0xff80;		encoding = "US-ASCII";	    } else if ("ISO-8859-1".equals (encoding)		    || "8859_1".equals (encoding)		    || "ISO8859_1".equals (encoding)) {		dangerMask = (short) 0xff00;		encoding = "ISO-8859-1";	    } else if ("UNICODE".equals (encoding)		    || "UNICODE-BIG".equals (encoding)		    || "UNICODE-LITTLE".equals (encoding)) {		encoding = "UTF-16";		// TODO: UTF-16BE, UTF-16LE ... no BOM; what		// release of JDK supports those Unicode names?	    }	    if (dangerMask != 0)		stringBuf = new StringBuffer ();	}	this.encoding = encoding;    }    /**     * Resets the handler to write a new text document.     *     * @param writer XML text is written to this writer.     * @param encoding if non-null, and an XML declaration is written,     *	this is the name that will be used for the character encoding.     *     * @exception IllegalStateException if the current     *	document hasn't yet ended (with {@link #endDocument})     */    final public void setWriter (Writer writer, String encoding)    {	if (out != null)	    throw new IllegalStateException (		"can't change stream in mid course");	out = writer;	if (out != null)	    setEncoding (encoding);	if (!(out instanceof BufferedWriter))	    out = new BufferedWriter (out);	space.push ("default");    }    /**     * Assigns the line ending style to be used on output.     * @param eolString null to use the system default; else     *	"\n", "\r", or "\r\n".     */    final public void setEOL (String eolString)    {	if (eolString == null)	    eol = sysEOL;	else if (!isLineEnd (eolString))	    eol = eolString;	else	    throw new IllegalArgumentException (eolString);    }    /**     * Assigns the error handler to be used to present most fatal     * errors.     */    public void setErrorHandler (ErrorHandler handler)    {	errHandler = handler;    }    /**     * Used internally and by subclasses, this encapsulates the logic     * involved in reporting fatal errors.  It uses locator information     * for good diagnostics, if available, and gives the application's     * ErrorHandler the opportunity to handle the error before throwing     * an exception.     */    protected void fatal (String message, Exception e)    throws SAXException    {	SAXParseException	x;	if (locator == null)	    x = new SAXParseException (message, null, null, -1, -1, e);	else	    x = new SAXParseException (message, locator, e);	if (errHandler != null)	    errHandler.fatalError (x);	throw x;    }    // JavaBeans properties    /**     * Controls whether the output should attempt to follow the "transitional"     * XHTML rules so that it meets the "HTML Compatibility Guidelines"     * appendix in the XHTML specification.  A "transitional" Document Type     * Declaration (DTD) is placed near the beginning of the output document,     * instead of whatever DTD would otherwise have been placed there, and     * XHTML empty elements are printed specially.  When writing text in     * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal     * entity names are used (in preference to character references) when     * writing content characters which can't be expressed in those encodings.     *     * <p> When this option is enabled, it is the caller's responsibility     * to ensure that the input is otherwise valid as XHTML.  Things to     * be careful of in all cases, as described in the appendix referenced     * above, include:  <ul>     *     *	<li> Element and attribute names must be in lower case, both     *		in the document and in any CSS style sheet.     *	<li> All XML constructs must be valid as defined by the XHTML     *		"transitional" DTD (including all familiar constructs,     *		even deprecated ones).     *	<li> The root element must be "html".     *	<li> Elements that must be empty (such as <em>&lt;br&gt;</em>     *		must have no content.     *	<li> Use both <em>lang</em> and <em>xml:lang</em> attributes     *		when specifying language.     *	<li> Similarly, use both <em>id</em> and <em>name</em> attributes     *		when defining elements that may be referred to through     *		URI fragment identifiers ... and make sure that the     *		value is a legal NMTOKEN, since not all such HTML 4.0     *		identifiers are valid in XML.     *	<li> Be careful with character encodings; make sure you provide     *		a <em>&lt;meta http-equiv="Content-type"     *		content="text/xml;charset=..." /&gt;</em> element in     *		the HTML "head" element, naming the same encoding     *		used to create this handler.  Also, if that encoding     *		is anything other than US-ASCII, make sure that if     *		the document is given a MIME content type, it has     *		a <em>charset=...</em> attribute with that encoding.     *	</ul>     *     * <p> Additionally, some of the oldest browsers have additional     * quirks, to address with guidelines such as: <ul>     *     *	<li> Processing instructions may be rendered, so avoid them.     *		(Similarly for an XML declaration.)     *	<li> Embedded style sheets and scripts should not contain XML     *		markup delimiters:  &amp;, &lt;, and ]]&gt; are trouble.     *	<li> Attribute values should not have line breaks or multiple     *		consecutive white space characters.     *	<li> Use no more than one of the deprecated (transitional)     *		<em>&lt;isindex&gt;</em> elements.     *	<li> Some boolean attributes (such as <em>compact, checked,     *		disabled, readonly, selected,</em> and more) confuse     *		some browsers, since they only understand minimized     *		versions which are illegal in XML.     *	</ul>     *     * <p> Also, some characteristics of the resulting output may be     * a function of whether the document is later given a MIME     * content type of <em>text/html</em> rather than one indicating     * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,     * some browsers ignore MIME content types and prefer to rely URI     * name suffixes -- so an "index.xml" could always be XML, never     * XHTML, no matter its MIME type.     */    final public void setXhtml (boolean value)    {	if (locator != null)	    throw new IllegalStateException ("started parsing");	xhtml = value;	if (xhtml)	    canonical = false;    }    /**     * Returns true if the output attempts to echo the input following     * "transitional" XHTML rules and matching the "HTML Compatibility     * Guidelines" so that an HTML version 3 browser can read the output     * as HTML; returns false (the default) othewise.     */    final public boolean isXhtml ()    {	return xhtml;    }    /**     * Controls whether the output text contains references to     * entities (the default), or instead contains the expanded     * values of those entities.     */    final public void setExpandingEntities (boolean value)    {	if (locator != null)	    throw new IllegalStateException ("started parsing");	expandingEntities = value;	if (!expandingEntities)	    canonical = false;    }    /**     * Returns true if the output will have no entity references;     * returns false (the default) otherwise.     */    final public boolean isExpandingEntities ()    {	return expandingEntities;    }    /**     * Controls pretty-printing, which by default is not enabled     * (and currently is most useful for XHTML output).     * Pretty printing enables structural indentation, sorting of attributes     * by name, line wrapping, and potentially other mechanisms for making     * output more or less readable.
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -