📄 validationconsumer.java
字号:
/* ValidationConsumer.java -- Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.This file is part of GNU Classpath.GNU Classpath is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU Classpath is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU Classpath; see the file COPYING. If not, write to theFree Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA02110-1301 USA.Linking this library statically or dynamically with other modules ismaking a combined work based on this library. Thus, the terms andconditions of the GNU General Public License cover the wholecombination.As a special exception, the copyright holders of this library give youpermission to link this library with independent modules to produce anexecutable, regardless of the license terms of these independentmodules, and to copy and distribute the resulting executable underterms of your choice, provided that you also meet, for each linkedindependent module, the terms and conditions of the license of thatmodule. An independent module is a module which is not derived fromor based on this library. If you modify this library, you may extendthis exception to your version of the library, but you are notobligated to do so. If you do not wish to do so, delete thisexception statement from your version. */package gnu.xml.pipeline;import java.io.IOException;import java.io.StringReader;import java.io.StringWriter;import java.util.EmptyStackException;import java.util.Enumeration;import java.util.Hashtable;import java.util.Stack;import java.util.StringTokenizer;import java.util.Vector;import org.xml.sax.Attributes;import org.xml.sax.EntityResolver;import org.xml.sax.ErrorHandler;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.XMLReaderFactory;/** * This class checks SAX2 events to report validity errors; it works as * both a filter and a terminus on an event pipeline. It relies on the * producer of SAX events to: </p> <ol> * * <li> Conform to the specification of a non-validating XML parser that * reads all external entities, reported using SAX2 events. </li> * * <li> Report ignorable whitespace as such (through the ContentHandler * interface). This is, strictly speaking, optional for nonvalidating * XML processors. </li> * * <li> Make SAX2 DeclHandler callbacks, with default * attribute values already normalized (and without "<").</li> * * <li> Make SAX2 LexicalHandler startDTD() and endDTD () * callbacks. </li> * * <li> Act as if the <em>(URI)/namespace-prefixes</em> property were * set to true, by providing XML 1.0 names and all <code>xmlns*</code> * attributes (rather than omitting either or both). </li> * * </ol> * * <p> At this writing, the major SAX2 parsers (such as Ælfred2, * Crimson, and Xerces) meet these requirements, and this validation * module is used by the optional Ælfred2 validation support. * </p> * * <p> Note that because this is a layered validator, it has to duplicate some * work that the parser is doing; there are also other cost to layering. * However, <em>because of layering it doesn't need a parser</em> in order * to work! You can use it with anything that generates SAX events, such * as an application component that wants to detect invalid content in * a changed area without validating an entire document, or which wants to * ensure that it doesn't write invalid data to a communications partner.</p> * * <p> Also, note that because this is a layered validator, the line numbers * reported for some errors may seem strange. For example, if an element does * not permit character content, the validator * will use the locator provided to it. * That might reflect the last character of a <em>characters</em> event * callback, rather than the first non-whitespace character. </p> * * <hr /> * * <!-- * <p> Of interest is the fact that unlike most currently known XML validators, * this one can report some cases of non-determinism in element content models. * It is a compile-time option, enabled by default. This will only report * such XML errors if they relate to content actually appearing in a document; * content models aren't aggressively scanned for non-deterministic structure. * Documents which trigger such non-deterministic transitions may be handled * differently by different validating parsers, without losing conformance * to the XML specification. </p> * --> * * <p> Current limitations of the validation performed are in roughly three * categories. </p> * * <p> The first category represents constraints which demand violations * of software layering: exposing lexical details, one of the first things * that <em>application</em> programming interfaces (APIs) hide. These * invariably relate to XML entity handling, and to historical oddities * of the XML validation semantics. Curiously, * recent (Autumn 1999) conformance testing showed that these constraints are * among those handled worst by existing XML validating parsers. Arguments * have been made that each of these VCs should be turned into WFCs (most * of them) or discarded (popular for the standalone declaration); in short, * that these are bugs in the XML specification (not all via SGML): </p><ul> * * <li> The <em>Proper Declaration/PE Nesting</em> and * <em>Proper Group/PE Nesting</em> VCs can't be tested because they * require access to particularly low level lexical level information. * In essence, the reason XML isn't a simple thing to parse is that * it's not a context free grammar, and these constraints elevate that * SGML-derived context sensitivity to the level of a semantic rule. * * <li> The <em>Standalone Document Declaration</em> VC can't be * tested. This is for two reasons. First, this flag isn't made * available through SAX2. Second, it also requires breaking that * lexical layering boundary. (If you ever wondered why classes * in compiler construction or language design barely mention the * existence of context-sensitive grammars, it's because of messy * issues like these.) * * <li> The <em>Entity Declared</em> VC can't be tested, because it * also requires breaking that lexical layering boundary! There's also * another issue: the VC wording (and seemingly intent) is ambiguous. * (This is still true in the "Second edition" XML spec.) * Since there is a WFC of the same name, everyone's life would be * easier if references to undeclared parsed entities were always well * formedness errors, regardless of whether they're parameter entities * or not. (Note that nonvalidating parsers are not required * to report all such well formedness errors if they don't read external * parameter entities, although currently most XML parsers read them * in an attempt to avoid problems from inconsistent parser behavior.) * * </ul> * * <p> The second category of limitations on this validation represent * constraints associated with information that is not guaranteed to be * available (or in one case, <em>is guaranteed not to be available</em>, * through the SAX2 API: </p><ul> * * <li> The <em>Unique Element Type Declaration</em> VC may not be * reportable, if the underlying parser happens not to expose * multiple declarations. (Ælfred2 reports these validity * errors directly.)</li> * * <li> Similarly, the <em>Unique Notation Name</em> VC, added in the * 14-January-2000 XML spec errata to restrict typing models used by * elements, may not be reportable. (Ælfred reports these * validity errors directly.) </li> * * </ul> * * <p> A third category relates to ease of implementation. (Think of this * as "bugs".) The most notable issue here is character handling. Rather * than attempting to implement the voluminous character tables in the XML * specification (Appendix B), Unicode rules are used directly from * the java.lang.Character class. Recent JVMs have begun to diverge from * the original specification for that class (Unicode 2.0), meaning that * different JVMs may handle that aspect of conformance differently. * </p> * * <p> Note that for some of the validity errors that SAX2 does not * expose, a nonvalidating parser is permitted (by the XML specification) * to report validity errors. When used with a parser that does so for * the validity constraints mentioned above (or any other SAX2 event * stream producer that does the same thing), overall conformance is * substantially improved. * * @see gnu.xml.aelfred2.SAXDriver * @see gnu.xml.aelfred2.XmlReader * * @author David Brownell */public final class ValidationConsumer extends EventFilter{ // report error if we happen to notice a non-deterministic choice? // we won't report buggy content models; just buggy instances private static final boolean warnNonDeterministic = false; // for tracking active content models private String rootName; private Stack contentStack = new Stack (); // flags for "saved DTD" processing private boolean disableDeclarations; private boolean disableReset; // // most VCs get tested when we see element start tags. the per-element // info (including attributes) recorded here duplicates that found inside // many nonvalidating parsers, hence dual lookups etc ... that's why a // layered validator isn't going to be as fast as a non-layered one. // // key = element name; value = ElementInfo private Hashtable elements = new Hashtable (); // some VCs relate to ID/IDREF/IDREFS attributes // key = id; value = boolean true (defd) or false (refd) private Hashtable ids = new Hashtable (); // we just record declared notation and unparsed entity names. // the implementation here is simple/slow; these features // are seldom used, one hopes they'll wither away soon private Vector notations = new Vector (5, 5); private Vector nDeferred = new Vector (5, 5); private Vector unparsed = new Vector (5, 5); private Vector uDeferred = new Vector (5, 5); // note: DocBk 3.1.7 XML defines over 2 dozen notations, // used when defining unparsed entities for graphics // (and maybe in other places) /** * Creates a pipeline terminus which consumes all events passed to * it; this will report validity errors as if they were fatal errors, * unless an error handler is assigned. * * @see #setErrorHandler */ // constructor used by PipelineFactory // ... and want one taking system ID of an external subset public ValidationConsumer () { this (null); } /** * Creates a pipeline filter which reports validity errors and then * passes events on to the next consumer if they were not fatal. * * @see #setErrorHandler */ // constructor used by PipelineFactory // ... and want one taking system ID of an external subset // (which won't send declaration events) public ValidationConsumer (EventConsumer next) { super (next); setContentHandler (this); setDTDHandler (this); try { setProperty (DECL_HANDLER, this); } catch (Exception e) { /* "can't happen" */ } try { setProperty (LEXICAL_HANDLER, this); } catch (Exception e) { /* "can't happen" */ } } private static final String fakeRootName = ":Nobody:in:their_Right.Mind_would:use:this-name:1x:"; /** * Creates a validation consumer which is preloaded with the DTD provided. * It does this by constructing a document with that DTD, then parsing * that document and recording its DTD declarations. Then it arranges * not to modify that information. * * <p> The resulting validation consumer will only validate against * the specified DTD, regardless of whether some other DTD is found * in a document being parsed. * * @param rootName The name of the required root element; if this is * null, any root element name will be accepted. * @param publicId If non-null and there is a non-null systemId, this * identifier provides an alternate access identifier for the DTD's * external subset. * @param systemId If non-null, this is a URI (normally URL) that * may be used to access the DTD's external subset. * @param internalSubset If non-null, holds literal markup declarations * comprising the DTD's internal subset. * @param resolver If non-null, this will be provided to the parser for * use when resolving parameter entities (including any external subset). * @param resolver If non-null, this will be provided to the parser for * use when resolving parameter entities (including any external subset). * @param minimalElement If non-null, a minimal valid document. * * @exception SAXNotSupportedException If the default SAX parser does * not support the standard lexical or declaration handlers. * @exception SAXParseException If the specified DTD has either * well-formedness or validity errors * @exception IOException If the specified DTD can't be read for * some reason */ public ValidationConsumer ( String rootName, String publicId, String systemId, String internalSubset, EntityResolver resolver, String minimalDocument ) throws SAXException, IOException { this (null); disableReset = true; if (rootName == null) rootName = fakeRootName; // // Synthesize document with that DTD; is it possible to do // better for the declaration of the root element? // // NOTE: can't use SAX2 to write internal subsets. // StringWriter writer = new StringWriter (); writer.write ("<!DOCTYPE "); writer.write (rootName); if (systemId != null) { writer.write ("\n "); if (publicId != null) { writer.write ("PUBLIC '"); writer.write (publicId); writer.write ("'\n\t'"); } else writer.write ("SYSTEM '"); writer.write (systemId); writer.write ("'"); } writer.write (" [ "); if (rootName == fakeRootName) { writer.write ("\n<!ELEMENT "); writer.write (rootName); writer.write (" EMPTY>"); } if (internalSubset != null) writer.write (internalSubset); writer.write ("\n ]>"); if (minimalDocument != null) { writer.write ("\n"); writer.write (minimalDocument); writer.write ("\n"); } else { writer.write (" <"); writer.write (rootName); writer.write ("/>\n"); } minimalDocument = writer.toString (); // // OK, load it // XMLReader producer; producer = XMLReaderFactory.createXMLReader (); bind (producer, this); if (resolver != null) producer.setEntityResolver (resolver); InputSource in; in = new InputSource (new StringReader (minimalDocument)); producer.parse (in); disableDeclarations = true; if (rootName == fakeRootName) this.rootName = null; } private void resetState () { if (!disableReset) { rootName = null; contentStack.removeAllElements (); elements.clear (); ids.clear (); notations.removeAllElements (); nDeferred.removeAllElements (); unparsed.removeAllElements (); uDeferred.removeAllElements (); } } private void warning (String description) throws SAXException { ErrorHandler errHandler = getErrorHandler (); Locator locator = getDocumentLocator (); SAXParseException err; if (errHandler == null) return; if (locator == null) err = new SAXParseException (description, null, null, -1, -1); else err = new SAXParseException (description, locator); errHandler.warning (err); } // package private (for ChildrenRecognizer) private void error (String description) throws SAXException { ErrorHandler errHandler = getErrorHandler (); Locator locator = getDocumentLocator (); SAXParseException err; if (locator == null) err = new SAXParseException (description, null, null, -1, -1); else err = new SAXParseException (description, locator); if (errHandler != null) errHandler.error (err); else // else we always treat it as fatal! throw err; } private void fatalError (String description) throws SAXException { ErrorHandler errHandler = getErrorHandler (); Locator locator = getDocumentLocator (); SAXParseException err; if (locator != null) err = new SAXParseException (description, locator); else err = new SAXParseException (description, null, null, -1, -1); if (errHandler != null) errHandler.fatalError (err); // we always treat this as fatal, regardless of the handler throw err; } private static boolean isExtender (char c) { // [88] Extender ::= ... return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 || (c >= 0x3031 && c <= 0x3035) || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe); } // use augmented Unicode rules, not full XML rules private boolean isName (String name, String context, String id) throws SAXException { char buf [] = name.toCharArray (); boolean pass = true; if (!Character.isUnicodeIdentifierStart (buf [0]) && ":_".indexOf (buf [0]) == -1) pass = false; else { int max = buf.length; for (int i = 1; pass && i < max; i++) { char c = buf [i]; if (!Character.isUnicodeIdentifierPart (c)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -