📄 validationconsumer.java
字号:
} else { Recognizer state = (Recognizer) contentStack.peek (); if (state != null) { Recognizer newstate = state.acceptElement (qName); if (newstate == null) error ("Element type '" + qName + "' in element '" + state.type.name + "' violates content model " + state.type.model ); if (newstate != state) { contentStack.pop (); contentStack.push (newstate); } } } // // Then check that this element was declared, and push the // object used to validate its content model onto our stack. // // This is where the recognizer gets created, if needed; if // it's a "children" (elements) content model, an NDFA is // created. (One recognizer is used per content type, no // matter how complex that recognizer is.) // ElementInfo info; info = (ElementInfo) elements.get (qName); if (info == null || info.model == null) { // VC: Element Valid (base clause) error ("Element type '" + qName + "' was not declared"); contentStack.push (null); // for less diagnostic noise, fake a declaration. elementDecl (qName, "ANY"); } else contentStack.push (info.getRecognizer (this)); // // Then check each attribute present // int len; String aname; AttributeInfo ainfo; if (atts != null) len = atts.getLength (); else len = 0; for (int i = 0; i < len; i++) { aname = atts.getQName (i); if (info == null || (ainfo = (AttributeInfo) info.attributes.get (aname)) == null) { // VC: Attribute Value Type error ("Attribute '" + aname + "' was not declared for element type " + qName); continue; } String value = atts.getValue (i); // note that "==" for type names and "#FIXED" is correct // (and fast) since we've interned those literals. if ("#FIXED" == ainfo.mode) { String expanded = expandDefaultRefs (ainfo.value); // VC: Fixed Attribute Default if (!value.equals (expanded)) { error ("Attribute '" + aname + "' must match " + expanded ); continue; } } if ("CDATA" == ainfo.type) continue; // // For all other attribute types, there are various // rules to follow. // if ("ID" == ainfo.type) { // VC: ID (must be a name) if (isName (value, "ID attribute", aname)) { if (Boolean.TRUE == ids.get (value)) // VC: ID (appears once) error ("ID attribute " + aname + " uses an ID value '" + value + "' which was already declared."); else // any forward refs are no longer problems ids.put (value, Boolean.TRUE); } continue; } if ("IDREF" == ainfo.type) { // VC: IDREF (value must be a name) if (isName (value, "IDREF attribute", aname)) { // VC: IDREF (must match some ID attribute) if (ids.get (value) == null) // new -- assume it's a forward ref ids.put (value, Boolean.FALSE); } continue; } if ("IDREFS" == ainfo.type) { StringTokenizer tokens = new StringTokenizer (value, " "); if (!tokens.hasMoreTokens ()) { // VC: IDREF (one or more values) error ("IDREFS attribute " + aname + " must have at least one ID ref"); } else do { String id = tokens.nextToken (); // VC: IDREF (value must be a name) if (isName (id, "IDREFS attribute", aname)) { // VC: IDREF (must match some ID attribute) if (ids.get (id) == null) // new -- assume it's a forward ref ids.put (id, Boolean.FALSE); } } while (tokens.hasMoreTokens ()); continue; } if ("NMTOKEN" == ainfo.type) { // VC: Name Token (is a name token) isNmtoken (value, "NMTOKEN attribute", aname); continue; } if ("NMTOKENS" == ainfo.type) { StringTokenizer tokens = new StringTokenizer (value, " "); if (!tokens.hasMoreTokens ()) { // VC: Name Token (one or more values) error ("NMTOKENS attribute " + aname + " must have at least one name token"); } else do { String token = tokens.nextToken (); // VC: Name Token (is a name token) isNmtoken (token, "NMTOKENS attribute", aname); } while (tokens.hasMoreTokens ()); continue; } if ("ENTITY" == ainfo.type) { if (!unparsed.contains (value)) // VC: Entity Name error ("Value of attribute '" + aname + "' refers to unparsed entity '" + value + "' which was not declared."); continue; } if ("ENTITIES" == ainfo.type) { StringTokenizer tokens = new StringTokenizer (value, " "); if (!tokens.hasMoreTokens ()) { // VC: Entity Name (one or more values) error ("ENTITIES attribute " + aname + " must have at least one name token"); } else do { String entity = tokens.nextToken (); if (!unparsed.contains (entity)) // VC: Entity Name error ("Value of attribute '" + aname + "' refers to unparsed entity '" + entity + "' which was not declared."); } while (tokens.hasMoreTokens ()); continue; } // // check for enumerations last; more expensive // if (ainfo.type.charAt (0) == '(' /*)*/ || ainfo.type.startsWith ("NOTATION ") ) { // VC: Enumeration (value must be defined) checkEnumeration (value, ainfo.type, aname); continue; } } // // Last, check that all #REQUIRED attributes were provided // if (info != null) { Hashtable table = info.attributes; if (table.size () != 0) { Enumeration e = table.keys (); // XXX table.keys uses the heap, bleech -- slows things while (e.hasMoreElements ()) { aname = (String) e.nextElement (); ainfo = (AttributeInfo) table.get (aname); // "#REQUIRED" mode was interned in attributeDecl if ("#REQUIRED" == ainfo.mode && atts.getValue (aname) == null) { // VC: Required Attribute error ("Attribute '" + aname + "' must be specified " + "for element type " + qName); } } } } super.startElement (uri, localName, qName, atts); } /** * <b>ContentHandler</b> Reports a validity error if the element's content * model does not permit character data. * Passed to the next consumer. */ public void characters (char ch [], int start, int length) throws SAXException { Recognizer state; if (contentStack.empty ()) state = null; else state = (Recognizer) contentStack.peek (); // NOTE: if this ever supports with SAX parsers that don't // report ignorable whitespace as such (only XP?), this class // needs to morph it into ignorableWhitespace() as needed ... if (state != null && !state.acceptCharacters ()) // VC: Element Valid (clauses three, four -- see recognizer) error ("Character content not allowed in element " + state.type.name); super.characters (ch, start, length); } /** * <b>ContentHandler</b> Reports a validity error if the element's content * model does not permit end-of-element yet, or a well formedness error * if there was no matching startElement call. * Passed to the next consumer. */ public void endElement (String uri, String localName, String qName) throws SAXException { try { Recognizer state = (Recognizer) contentStack.pop (); if (state != null && !state.completed ()) // VC: Element valid (clauses two, three, four; see Recognizer) error ("Premature end for element '" + state.type.name + "', content model " + state.type.model); // could insist on match of start element, but that's // something the input stream must to guarantee. } catch (EmptyStackException e) { fatalError ("endElement without startElement: " + qName + ((uri == null) ? "" : ( " { '" + uri + "', " + localName + " }"))); } super.endElement (uri, localName, qName); } /** * <b>ContentHandler</b> Checks whether all ID values that were * referenced have been declared, and releases all resources. * Passed to the next consumer. * * @see #setDocumentLocator */ public void endDocument () throws SAXException { for (Enumeration idNames = ids.keys (); idNames.hasMoreElements (); /* NOP */) { String id = (String) idNames.nextElement (); if (Boolean.FALSE == ids.get (id)) { // VC: IDREF (must match ID) error ("Undeclared ID value '" + id + "' was referred to by an IDREF/IDREFS attribute"); } } resetState (); super.endDocument (); } /** Holds per-element declarations */ static private final class ElementInfo { String name; String model; // key = attribute name; value = AttributeInfo Hashtable attributes = new Hashtable (11); ElementInfo (String n) { name = n; } private Recognizer recognizer; // for validating content models: one per type, shared, // and constructed only on demand ... so unused elements do // not need to consume resources. Recognizer getRecognizer (ValidationConsumer consumer) throws SAXException { if (recognizer == null) { if ("ANY".equals (model)) recognizer = ANY; else if ("EMPTY".equals (model)) recognizer = new EmptyRecognizer (this); else if ('#' == model.charAt (1)) // n.b. this constructor does a validity check recognizer = new MixedRecognizer (this, consumer); else recognizer = new ChildrenRecognizer (this, consumer); } return recognizer; } } /** Holds per-attribute declarations */ static private final class AttributeInfo { String type; String mode; // #REQUIRED, etc (or null) String value; // or null } // // Content model validation // static private final Recognizer ANY = new Recognizer (null); // Base class defines the calls used to validate content, // and supports the "ANY" content model static private class Recognizer { final ElementInfo type; Recognizer (ElementInfo t) { type = t; } // return true iff character data is legal here boolean acceptCharacters () throws SAXException // VC: Element Valid (third and fourth clauses) { return true; } // null return = failure // otherwise, next state (like an FSM) // prerequisite: tested that name was declared Recognizer acceptElement (String name) throws SAXException // VC: Element Valid (fourth clause) { return this; } // return true iff model is completed, can finish boolean completed () throws SAXException // VC: Element Valid (fourth clause) { return true; } public String toString () // n.b. "children" is the interesting case! { return (type == null) ? "ANY" : type.model; } } // "EMPTY" content model -- no characters or elements private static final class EmptyRecognizer extends Recognizer { public EmptyRecognizer (ElementInfo type) { super (type); } // VC: Element Valid (first clause) boolean acceptCharacters () { return false; } // VC: Element Valid (first clause) Recognizer acceptElement (String name) { return null; } } // "Mixed" content model -- ANY, but restricts elements private static final class MixedRecognizer extends Recognizer { private String permitted []; // N.B. constructor tests for duplicated element names (VC) public MixedRecognizer (ElementInfo t, ValidationConsumer v) throws SAXException { super (t); // (#PCDATA...)* or (#PCDATA) ==> ... or empty // with the "..." being "|elname|..." StringTokenizer tokens = new StringTokenizer ( t.model.substring (8, t.model.lastIndexOf (')')), "|"); Vector vec = new Vector (); while (tokens.hasMoreTokens ()) { String token = tokens.nextToken (); if (vec.contains (token)) v.error ("element " + token + " is repeated in mixed content model: " + t.model); else vec.addElement (token.intern ()); } permitted = new String [vec.size ()]; for (int i = 0; i < permitted.length; i++) permitted [i] = (String) vec.elementAt (i); // in one large machine-derived DTD sample, most of about // 250 mixed content models were empty, and 25 had ten or // more entries. 2 had over a hundred elements. Linear // search isn't obviously wrong. } // VC: Element Valid (third clause) Recognizer acceptElement (String name) { int length = permitted.length; // first pass -- optimistic w.r.t. event source interning // (and document validity) for (int i = 0; i < length; i++) if (permitted [i] == name) return this; // second pass -- pessimistic w.r.t. event source interning for (int i = 0; i < length; i++) if (permitted [i].equals (name)) return this; return null; } } // recognizer loop flags, see later private static final int F_LOOPHEAD = 0x01; private static final int F_LOOPNEXT = 0x02; // for debugging -- used to label/count nodes in toString() private static int nodeCount; /** * "Children" content model -- these are nodes in NDFA state graphs. * They work in fixed space. Note that these graphs commonly have * cycles, handling features such as zero-or-more and one-or-more. * * <p>It's readonly, so only one copy is ever needed. The content model * stack may have any number of pointers into each graph, when a model * happens to be needed more than once due to element nesting. Since * traversing the graph just moves to another node, and never changes
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -