📄 xmlparser.java
字号:
* <p>NOTE: the '<!ATTLIST' has already been read. */ private void parseAttlistDecl () throws Exception { String elementName; requireWhitespace (); elementName = readNmtoken (true); boolean white = tryWhitespace (); while (!tryRead ('>')) { if (!white) error ("whitespace required before attribute definition"); parseAttDef (elementName); white = tryWhitespace (); } } /** * Parse a single attribute definition. * <pre> * [53] AttDef ::= S Name S AttType S DefaultDecl * </pre> */ private void parseAttDef (String elementName) throws Exception { String name; int type; String enum = null; // Read the attribute name. name = readNmtoken (true); // Read the attribute type. requireWhitespace (); type = readAttType (); // Get the string of enumerated values // if necessary. if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) { enum = dataBufferToString (); } // Read the default value. requireWhitespace (); parseDefault (elementName, name, type, enum); } /** * Parse the attribute type. * <pre> * [54] AttType ::= StringType | TokenizedType | EnumeratedType * [55] StringType ::= 'CDATA' * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' * | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' * [57] EnumeratedType ::= NotationType | Enumeration * </pre> */ private int readAttType () throws Exception { String typeString; Integer type; if (tryRead ('(')) { parseEnumeration (false); return ATTRIBUTE_ENUMERATED; } else { typeString = readNmtoken (true); if (typeString.equals ("NOTATION")) { parseNotationType (); } type = (Integer) attributeTypeHash.get (typeString); if (type == null) { error ("illegal attribute type", typeString, null); return ATTRIBUTE_UNDECLARED; } else { return type.intValue (); } } } /** * Parse an enumeration. * <pre> * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' * </pre> * <p>NOTE: the '(' has already been read. */ private void parseEnumeration (boolean isNames) throws Exception { char c; dataBufferAppend ('('); // Read the first token. skipWhitespace (); dataBufferAppend (readNmtoken (isNames)); // Read the remaining tokens. skipWhitespace (); while (!tryRead (')')) { require ('|'); dataBufferAppend ('|'); skipWhitespace (); dataBufferAppend (readNmtoken (isNames)); skipWhitespace (); } dataBufferAppend (')'); } /** * Parse a notation type for an attribute. * <pre> * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks * (S? '|' S? name)* S? ')' * </pre> * <p>NOTE: the 'NOTATION' has already been read */ private void parseNotationType () throws Exception { requireWhitespace (); require ('('); parseEnumeration (true); } /** * Parse the default value for an attribute. * <pre> * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' * | (('#FIXED' S)? AttValue) * </pre> */ private void parseDefault ( String elementName, String name, int type, String enum ) throws Exception { int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; String value = null; int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK; // Note: char refs not checked here, and input not normalized, // since it's done correctly later when we actually expand any // entity refs. We ought to report char ref syntax errors now, // but don't. Cost: unused defaults mean unreported WF errs. // LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace // chars to spaces (doesn't matter when that's done if it doesn't // interfere with char refs expanding to whitespace). if (tryRead ('#')) { if (tryRead ("FIXED")) { valueType = ATTRIBUTE_DEFAULT_FIXED; requireWhitespace (); value = readLiteral (flags); } else if (tryRead ("REQUIRED")) { valueType = ATTRIBUTE_DEFAULT_REQUIRED; } else if (tryRead ("IMPLIED")) { valueType = ATTRIBUTE_DEFAULT_IMPLIED; } else { error ("illegal keyword for attribute default value"); } } else value = readLiteral (flags); setAttribute (elementName, name, type, enum, value, valueType); } /** * Parse a conditional section. * <pre> * [61] conditionalSect ::= includeSect || ignoreSect * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' * extSubsetDecl ']]>' * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' * ignoreSectContents* ']]>' * [64] ignoreSectContents ::= Ignore * ('<![' ignoreSectContents* ']]>' Ignore )* * [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* ) * </pre> * <p> NOTE: the '>![' has already been read. */ private void parseConditionalSect () throws Exception { skipWhitespace (); if (tryRead ("INCLUDE")) { skipWhitespace (); require ('['); skipWhitespace (); while (!tryRead ("]]>")) { parseMarkupdecl (); skipWhitespace (); } } else if (tryRead ("IGNORE")) { skipWhitespace (); require ('['); int nesting = 1; char c; expandPE = false; for (int nest = 1; nest > 0;) { c = readCh (); switch (c) { case '<': if (tryRead ("![")) { nest++; } case ']': if (tryRead ("]>")) { nest--; } } } expandPE = true; } else { error ("conditional section must begin with INCLUDE or IGNORE"); } } /** * Read and interpret a character reference. * <pre> * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' * </pre> * <p>NOTE: the '&#' has already been read. */ private void parseCharRef () throws SAXException, IOException { int value = 0; char c; if (tryRead ('x')) {loop1: while (true) { c = readCh (); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': value *= 16; value += Integer.parseInt (new Character (c).toString (), 16); break; case ';': break loop1; default: error ("illegal character in character reference", c, null); break loop1; } } } else {loop2: while (true) { c = readCh (); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': value *= 10; value += Integer.parseInt (new Character (c).toString (), 10); break; case ';': break loop2; default: error ("illegal character in character reference", c, null); break loop2; } } } // check for character refs being legal XML if ((value < 0x0020 && ! (value == '\n' || value == '\t' || value == '\r')) || (value >= 0xD800 && value <= 0xDFFF) || value == 0xFFFE || value == 0xFFFF || value > 0x0010ffff) error ("illegal XML character reference U+" + Integer.toHexString (value)); // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: if (value <= 0x0000ffff) { // no surrogates needed dataBufferAppend ((char) value); } else if (value <= 0x0010ffff) { value -= 0x10000; // > 16 bits, surrogate needed dataBufferAppend ((char) (0xd800 | (value >> 10))); dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff))); } else { // too big for surrogate error ("character reference " + value + " is too large for UTF-16", new Integer (value).toString (), null); } } /** * Parse and expand an entity reference. * <pre> * [68] EntityRef ::= '&' Name ';' * </pre> * <p>NOTE: the '&' has already been read. * @param externalAllowed External entities are allowed here. */ private void parseEntityRef (boolean externalAllowed) throws SAXException, IOException { String name; name = readNmtoken (true); require (';'); switch (getEntityType (name)) { case ENTITY_UNDECLARED: error ("reference to undeclared entity", name, null); break; case ENTITY_INTERNAL: pushString (name, getEntityValue (name)); break; case ENTITY_TEXT: if (externalAllowed) { pushURL (name, getEntityPublicId (name), getEntitySystemId (name), null, null, null); } else { error ("reference to external entity in attribute value.", name, null); } break; case ENTITY_NDATA: if (externalAllowed) { error ("unparsed entity reference in content", name, null); } else { error ("reference to external entity in attribute value.", name, null); } break; } } /** * Parse and expand a parameter entity reference. * <pre> * [69] PEReference ::= '%' Name ';' * </pre> * <p>NOTE: the '%' has already been read. */ private void parsePEReference () throws SAXException, IOException { String name; name = "%" + readNmtoken (true); require (';'); switch (getEntityType (name)) { case ENTITY_UNDECLARED: // this is a validity problem, not a WFC violation ... but // we should disable handling of all subsequent declarations // unless this is a standalone document // warn ("reference to undeclared parameter entity", name, null); break; case ENTITY_INTERNAL: if (inLiteral) pushString (name, getEntityValue (name)); else pushString (name, " " + getEntityValue (name) + ' '); break; case ENTITY_TEXT: if (!inLiteral) pushString (null, " "); pushURL (name, getEntityPublicId (name), getEntitySystemId (name), null, null, null); if (!inLiteral) pushString (null, " "); break; } } /** * Parse an entity declaration. * <pre> * [70] EntityDecl ::= GEDecl | PEDecl * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) * [74] PEDef ::= EntityValue | ExternalID * [75] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * [76] NDataDecl ::= S 'NDATA' S Name * </pre> * <p>NOTE: the '<!ENTITY' has already been read. */ private void parseEntityDecl () throws Exception { char c; boolean peFlag = false; String name, value, notationName, ids[]; // Check for a parameter entity. expandPE = false; requireWhitespace (); if (tryRead ('%')) { peFlag = true; requireWhitespace (); } expandPE = true; // Read the entity name, and prepend // '%' if necessary. name = readNmtoken (true); if (peFlag) { name = "%" + name; } // Read the entity value. requireWhitespace (); c = readCh (); unread (c); if (c == '"' || c == '\'') { // Internal entity ... replacement text has expanded refs // to characters and PEs, but not to general entities value = readLiteral (0); setInternalEntity (name, value); } else { // Read the external IDs ids = readExternalIds (false); if (ids [1] == null) { error ("system identifer missing", name, null);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -