xmlwriter.java
来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 1,917 行 · 第 1/4 页
JAVA
1,917 行
/* * Copyright (C) 1999-2001 David Brownell * * This file is part of GNU JAXP, a library. * * GNU JAXP is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GNU JAXP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * As a special exception, if you link this library with other files to * produce an executable, this library does not by itself cause the * resulting executable to be covered by the GNU General Public License. * This exception does not however invalidate any other reasons why the * executable file might be covered by the GNU General Public License. */package gnu.xml.util;import java.io.BufferedWriter;import java.io.CharConversionException;import java.io.IOException;import java.io.OutputStream;import java.io.OutputStreamWriter;import java.io.Writer;import java.util.Stack;import org.xml.sax.*;import org.xml.sax.ext.*;import org.xml.sax.helpers.*;/** * This class is a SAX handler which writes all its input as a well formed * XML or XHTML document. If driven using SAX2 events, this output may * include a recreated document type declaration, subject to limitations * of SAX (no internal subset exposed) or DOM (the important declarations, * with their documentation, are discarded). * * <p> By default, text is generated "as-is", but some optional modes * are supported. Pretty-printing is supported, to make life easier * for people reading the output. XHTML (1.0) output has can be made * particularly pretty; all the built-in character entities are known. * Canonical XML can also be generated, assuming the input is properly * formed. * * <hr> * * <p> Some of the methods on this class are intended for applications to * use directly, rather than as pure SAX2 event callbacks. Some of those * methods access the JavaBeans properties (used to tweak output formats, * for example canonicalization and pretty printing). Subclasses * are expected to add new behaviors, not to modify current behavior, so * many such methods are final.</p> * * <p> The <em>write*()</em> methods may be slightly simpler for some * applications to use than direct callbacks. For example, they support * a simple policy for encoding data items as the content of a single element. * * <p> To reuse an XMLWriter you must provide it with a new Writer, since * this handler closes the writer it was given as part of its endDocument() * handling. (XML documents have an end of input, and the way to encode * that on a stream is to close it.) </p> * * <hr> * * <p> Note that any relative URIs in the source document, as found in * entity and notation declarations, ought to have been fully resolved by * the parser providing events to this handler. This means that the * output text should only have fully resolved URIs, which may not be * the desired behavior in cases where later binding is desired. </p> * * <p> <em>Note that due to SAX2 defaults, you may need to manually * ensure that the input events are XML-conformant with respect to namespace * prefixes and declarations. {@link gnu.xml.pipeline.NSFilter} is * one solution to this problem, in the context of processing pipelines.</em> * Something as simple as connecting this handler to a parser might not * generate the correct output. Another workaround is to ensure that the * <em>namespace-prefixes</em> feature is always set to true, if you're * hooking this directly up to some XMLReader implementation. * * @see gnu.xml.pipeline.TextConsumer * * @author David Brownell */public class XMLWriter implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler{ // text prints/escapes differently depending on context // CTX_ENTITY ... entity literal value // CTX_ATTRIBUTE ... attribute literal value // CTX_CONTENT ... content of an element // CTX_UNPARSED ... CDATA, comment, PI, names, etc // CTX_NAME ... name or nmtoken, no escapes possible private static final int CTX_ENTITY = 1; private static final int CTX_ATTRIBUTE = 2; private static final int CTX_CONTENT = 3; private static final int CTX_UNPARSED = 4; private static final int CTX_NAME = 5;// FIXME: names (element, attribute, PI, notation, etc) are not// currently written out with range checks (escapeChars).// In non-XHTML, some names can't be directly written; panic! private static String sysEOL; static { try { sysEOL = System.getProperty ("line.separator", "\n"); // don't use the system's EOL if it's illegal XML. if (!isLineEnd (sysEOL)) sysEOL = "\n"; } catch (SecurityException e) { sysEOL = "\n"; } } private static boolean isLineEnd (String eol) { return "\n".equals (eol) || "\r".equals (eol) || "\r\n".equals (eol); } private Writer out; private boolean inCDATA; private int elementNestLevel; private String eol = sysEOL; private short dangerMask; private StringBuffer stringBuf; private Locator locator; private ErrorHandler errHandler; private boolean expandingEntities = false; private int entityNestLevel; private boolean xhtml; private boolean startedDoctype; private String encoding; private boolean canonical; private boolean inDoctype; private boolean inEpilogue; // pretty printing controls private boolean prettyPrinting; private int column; private boolean noWrap; private Stack space = new Stack (); // this is not a hard'n'fast rule -- longer lines are OK, // but are to be avoided. Here, prettyprinting is more to // show structure "cleanly" than to be precise about it. // better to have ragged layout than one line 24Kb long. private static final int lineLength = 75; /** * Constructs this handler with System.out used to write SAX events * using the UTF-8 encoding. Avoid using this except when you know * it's safe to close System.out at the end of the document. */ public XMLWriter () throws IOException { this (System.out); } /** * Constructs a handler which writes all input to the output stream * in the UTF-8 encoding, and closes it when endDocument is called. * (Yes it's annoying that this throws an exception -- but there's * really no way around it, since it's barely possible a JDK may * exist somewhere that doesn't know how to emit UTF-8.) */ public XMLWriter (OutputStream out) throws IOException { this (new OutputStreamWriter (out, "UTF8")); } /** * Constructs a handler which writes all input to the writer, and then * closes the writer when the document ends. If an XML declaration is * written onto the output, and this class can determine the name of * the character encoding for this writer, that encoding name will be * included in the XML declaration. * * <P> See the description of the constructor which takes an encoding * name for imporant information about selection of encodings. * * @param writer XML text is written to this writer. */ public XMLWriter (Writer writer) { this (writer, null); } /** * Constructs a handler which writes all input to the writer, and then * closes the writer when the document ends. If an XML declaration is * written onto the output, this class will use the specified encoding * name in that declaration. If no encoding name is specified, no * encoding name will be declared unless this class can otherwise * determine the name of the character encoding for this writer. * * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode") * output encodings are fully lossless with respect to XML data. If you * use any other encoding you risk having your data be silently mangled * on output, as the standard Java character encoding subsystem silently * maps non-encodable characters to a question mark ("?") and will not * report such errors to applications. * * <p> For a few other encodings the risk can be reduced. If the writer is * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1", * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which * can't be encoded in those encodings will be written safely. Where * relevant, the XHTML entity names will be used; otherwise, numeric * character references will be emitted. * * <P> However, there remain a number of cases where substituting such * entity or character references is not an option. Such references are * not usable within a DTD, comment, PI, or CDATA section. Neither may * they be used when element, attribute, entity, or notation names have * the problematic characters. * * @param writer XML text is written to this writer. * @param encoding if non-null, and an XML declaration is written, * this is the name that will be used for the character encoding. */ public XMLWriter (Writer writer, String encoding) { setWriter (writer, encoding); } private void setEncoding (String encoding) { if (encoding == null && out instanceof OutputStreamWriter) encoding = ((OutputStreamWriter)out).getEncoding (); if (encoding != null) { encoding = encoding.toUpperCase (); // Use official encoding names where we know them, // avoiding the Java-only names. When using common // encodings where we can easily tell if characters // are out of range, we'll escape out-of-range // characters using character refs for safety. // I _think_ these are all the main synonyms for these! if ("UTF8".equals (encoding)) { encoding = "UTF-8"; } else if ("US-ASCII".equals (encoding) || "ASCII".equals (encoding)) { dangerMask = (short) 0xff80; encoding = "US-ASCII"; } else if ("ISO-8859-1".equals (encoding) || "8859_1".equals (encoding) || "ISO8859_1".equals (encoding)) { dangerMask = (short) 0xff00; encoding = "ISO-8859-1"; } else if ("UNICODE".equals (encoding) || "UNICODE-BIG".equals (encoding) || "UNICODE-LITTLE".equals (encoding)) { encoding = "UTF-16"; // TODO: UTF-16BE, UTF-16LE ... no BOM; what // release of JDK supports those Unicode names? } if (dangerMask != 0) stringBuf = new StringBuffer (); } this.encoding = encoding; } /** * Resets the handler to write a new text document. * * @param writer XML text is written to this writer. * @param encoding if non-null, and an XML declaration is written, * this is the name that will be used for the character encoding. * * @exception IllegalStateException if the current * document hasn't yet ended (with {@link #endDocument}) */ final public void setWriter (Writer writer, String encoding) { if (out != null) throw new IllegalStateException ( "can't change stream in mid course"); out = writer; if (out != null) setEncoding (encoding); if (!(out instanceof BufferedWriter)) out = new BufferedWriter (out); space.push ("default"); } /** * Assigns the line ending style to be used on output. * @param eolString null to use the system default; else * "\n", "\r", or "\r\n". */ final public void setEOL (String eolString) { if (eolString == null) eol = sysEOL; else if (!isLineEnd (eolString)) eol = eolString; else throw new IllegalArgumentException (eolString); } /** * Assigns the error handler to be used to present most fatal * errors. */ public void setErrorHandler (ErrorHandler handler) { errHandler = handler; } /** * Used internally and by subclasses, this encapsulates the logic * involved in reporting fatal errors. It uses locator information * for good diagnostics, if available, and gives the application's * ErrorHandler the opportunity to handle the error before throwing * an exception. */ protected void fatal (String message, Exception e) throws SAXException { SAXParseException x; if (locator == null) x = new SAXParseException (message, null, null, -1, -1, e); else x = new SAXParseException (message, locator, e); if (errHandler != null) errHandler.fatalError (x); throw x; } // JavaBeans properties /** * Controls whether the output should attempt to follow the "transitional" * XHTML rules so that it meets the "HTML Compatibility Guidelines" * appendix in the XHTML specification. A "transitional" Document Type * Declaration (DTD) is placed near the beginning of the output document, * instead of whatever DTD would otherwise have been placed there, and * XHTML empty elements are printed specially. When writing text in * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal * entity names are used (in preference to character references) when * writing content characters which can't be expressed in those encodings. * * <p> When this option is enabled, it is the caller's responsibility * to ensure that the input is otherwise valid as XHTML. Things to * be careful of in all cases, as described in the appendix referenced * above, include: <ul> * * <li> Element and attribute names must be in lower case, both * in the document and in any CSS style sheet. * <li> All XML constructs must be valid as defined by the XHTML * "transitional" DTD (including all familiar constructs, * even deprecated ones). * <li> The root element must be "html". * <li> Elements that must be empty (such as <em><br></em> * must have no content. * <li> Use both <em>lang</em> and <em>xml:lang</em> attributes * when specifying language. * <li> Similarly, use both <em>id</em> and <em>name</em> attributes * when defining elements that may be referred to through * URI fragment identifiers ... and make sure that the * value is a legal NMTOKEN, since not all such HTML 4.0 * identifiers are valid in XML. * <li> Be careful with character encodings; make sure you provide * a <em><meta http-equiv="Content-type" * content="text/xml;charset=..." /></em> element in * the HTML "head" element, naming the same encoding * used to create this handler. Also, if that encoding * is anything other than US-ASCII, make sure that if * the document is given a MIME content type, it has * a <em>charset=...</em> attribute with that encoding. * </ul> * * <p> Additionally, some of the oldest browsers have additional * quirks, to address with guidelines such as: <ul> * * <li> Processing instructions may be rendered, so avoid them. * (Similarly for an XML declaration.) * <li> Embedded style sheets and scripts should not contain XML * markup delimiters: &, <, and ]]> are trouble. * <li> Attribute values should not have line breaks or multiple * consecutive white space characters. * <li> Use no more than one of the deprecated (transitional) * <em><isindex></em> elements. * <li> Some boolean attributes (such as <em>compact, checked, * disabled, readonly, selected,</em> and more) confuse * some browsers, since they only understand minimized * versions which are illegal in XML. * </ul> * * <p> Also, some characteristics of the resulting output may be * a function of whether the document is later given a MIME * content type of <em>text/html</em> rather than one indicating * XML (<em>application/xml</em> or <em>text/xml</em>). Worse, * some browsers ignore MIME content types and prefer to rely URI * name suffixes -- so an "index.xml" could always be XML, never * XHTML, no matter its MIME type. */ final public void setXhtml (boolean value) { if (locator != null) throw new IllegalStateException ("started parsing"); xhtml = value; if (xhtml) canonical = false; } /** * Returns true if the output attempts to echo the input following * "transitional" XHTML rules and matching the "HTML Compatibility * Guidelines" so that an HTML version 3 browser can read the output * as HTML; returns false (the default) othewise. */ final public boolean isXhtml () { return xhtml; } /** * Controls whether the output text contains references to * entities (the default), or instead contains the expanded * values of those entities. */ final public void setExpandingEntities (boolean value) { if (locator != null) throw new IllegalStateException ("started parsing"); expandingEntities = value; if (!expandingEntities) canonical = false; } /** * Returns true if the output will have no entity references; * returns false (the default) otherwise. */ final public boolean isExpandingEntities () { return expandingEntities; } /** * Controls pretty-printing, which by default is not enabled * (and currently is most useful for XHTML output). * Pretty printing enables structural indentation, sorting of attributes * by name, line wrapping, and potentially other mechanisms for making * output more or less readable. * * <p> At this writing, structural indentation and line wrapping are * enabled when pretty printing is enabled and the <em>xml:space</em> * attribute has the value <em>default</em> (its other legal value is * <em>preserve</em>, as defined in the XML specification). The three * XHTML element types which use another value are recognized by their * names (namespaces are ignored). * * <p> Also, for the record, the "pretty" aspect of printing here
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?