📄 xmlparser.java
字号:
{ int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; String value = null; int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK; // Note: char refs not checked here, and input not normalized, // since it's done correctly later when we actually expand any // entity refs. We ought to report char ref syntax errors now, // but don't. Cost: unused defaults mean unreported WF errs. // LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace // chars to spaces (doesn't matter when that's done if it doesn't // interfere with char refs expanding to whitespace). if (tryRead ('#')) { if (tryRead ("FIXED")) { valueType = ATTRIBUTE_DEFAULT_FIXED; requireWhitespace (); value = readLiteral (flags); } else if (tryRead ("REQUIRED")) { valueType = ATTRIBUTE_DEFAULT_REQUIRED; } else if (tryRead ("IMPLIED")) { valueType = ATTRIBUTE_DEFAULT_IMPLIED; } else { error ("illegal keyword for attribute default value"); } } else value = readLiteral (flags); setAttribute (elementName, name, type, enum, value, valueType); } /** * Parse a conditional section. * <pre> * [61] conditionalSect ::= includeSect || ignoreSect * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' * extSubsetDecl ']]>' * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' * ignoreSectContents* ']]>' * [64] ignoreSectContents ::= Ignore * ('<![' ignoreSectContents* ']]>' Ignore )* * [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* ) * </pre> * <p> NOTE: the '>![' has already been read. */ private void parseConditionalSect () throws Exception { skipWhitespace (); if (tryRead ("INCLUDE")) { skipWhitespace (); require ('['); skipWhitespace (); while (!tryRead ("]]>")) { parseMarkupdecl (); skipWhitespace (); } } else if (tryRead ("IGNORE")) { skipWhitespace (); require ('['); int nesting = 1; char c; expandPE = false; for (int nest = 1; nest > 0;) { c = readCh (); switch (c) { case '<': if (tryRead ("![")) { nest++; } case ']': if (tryRead ("]>")) { nest--; } } } expandPE = true; } else { error ("conditional section must begin with INCLUDE or IGNORE"); } } /** * Read and interpret a character reference. * <pre> * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' * </pre> * <p>NOTE: the '&#' has already been read. */ private void parseCharRef () throws SAXException, IOException { int value = 0; char c; if (tryRead ('x')) {loop1: while (true) { c = readCh (); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': value *= 16; value += Integer.parseInt (new Character (c).toString (), 16); break; case ';': break loop1; default: error ("illegal character in character reference", c, null); break loop1; } } } else {loop2: while (true) { c = readCh (); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': value *= 10; value += Integer.parseInt (new Character (c).toString (), 10); break; case ';': break loop2; default: error ("illegal character in character reference", c, null); break loop2; } } } // check for character refs being legal XML if ((value < 0x0020 && ! (value == '\n' || value == '\t' || value == '\r')) || (value >= 0xD800 && value <= 0xDFFF) || value == 0xFFFE || value == 0xFFFF || value > 0x0010ffff) error ("illegal XML character reference U+" + Integer.toHexString (value)); // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: if (value <= 0x0000ffff) { // no surrogates needed dataBufferAppend ((char) value); } else if (value <= 0x0010ffff) { value -= 0x10000; // > 16 bits, surrogate needed dataBufferAppend ((char) (0xd800 | (value >> 10))); dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff))); } else { // too big for surrogate error ("character reference " + value + " is too large for UTF-16", new Integer (value).toString (), null); } } /** * Parse and expand an entity reference. * <pre> * [68] EntityRef ::= '&' Name ';' * </pre> * <p>NOTE: the '&' has already been read. * @param externalAllowed External entities are allowed here. */ private void parseEntityRef (boolean externalAllowed) throws SAXException, IOException { String name; name = readNmtoken (true); require (';'); switch (getEntityType (name)) { case ENTITY_UNDECLARED: error ("reference to undeclared entity", name, null); break; case ENTITY_INTERNAL: pushString (name, getEntityValue (name)); break; case ENTITY_TEXT: if (externalAllowed) { pushURL (name, getEntityPublicId (name), getEntitySystemId (name), null, null, null); } else { error ("reference to external entity in attribute value.", name, null); } break; case ENTITY_NDATA: if (externalAllowed) { error ("unparsed entity reference in content", name, null); } else { error ("reference to external entity in attribute value.", name, null); } break; } } /** * Parse and expand a parameter entity reference. * <pre> * [69] PEReference ::= '%' Name ';' * </pre> * <p>NOTE: the '%' has already been read. */ private void parsePEReference () throws SAXException, IOException { String name; name = "%" + readNmtoken (true); require (';'); switch (getEntityType (name)) { case ENTITY_UNDECLARED: // this is a validity problem, not a WFC violation ... but // we should disable handling of all subsequent declarations // unless this is a standalone document // warn ("reference to undeclared parameter entity", name, null); break; case ENTITY_INTERNAL: if (inLiteral) pushString (name, getEntityValue (name)); else pushString (name, " " + getEntityValue (name) + ' '); break; case ENTITY_TEXT: if (!inLiteral) pushString (null, " "); pushURL (name, getEntityPublicId (name), getEntitySystemId (name), null, null, null); if (!inLiteral) pushString (null, " "); break; } } /** * Parse an entity declaration. * <pre> * [70] EntityDecl ::= GEDecl | PEDecl * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) * [74] PEDef ::= EntityValue | ExternalID * [75] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * [76] NDataDecl ::= S 'NDATA' S Name * </pre> * <p>NOTE: the '<!ENTITY' has already been read. */ private void parseEntityDecl () throws Exception { char c; boolean peFlag = false; String name, value, notationName, ids[]; // Check for a parameter entity. expandPE = false; requireWhitespace (); if (tryRead ('%')) { peFlag = true; requireWhitespace (); } expandPE = true; // Read the entity name, and prepend // '%' if necessary. name = readNmtoken (true); if (peFlag) { name = "%" + name; } // Read the entity value. requireWhitespace (); c = readCh (); unread (c); if (c == '"' || c == '\'') { // Internal entity ... replacement text has expanded refs // to characters and PEs, but not to general entities value = readLiteral (0); setInternalEntity (name, value); } else { // Read the external IDs ids = readExternalIds (false); if (ids [1] == null) { error ("system identifer missing", name, null); } // Check for NDATA declaration. boolean white = tryWhitespace (); if (!peFlag && tryRead ("NDATA")) { if (!white) error ("whitespace required before NDATA"); requireWhitespace (); notationName = readNmtoken (true); setExternalDataEntity (name, ids [0], ids [1], notationName); } else { setExternalTextEntity (name, ids [0], ids [1]); } } // Finish the declaration. skipWhitespace (); require ('>'); } /** * Parse a notation declaration. * <pre> * [82] NotationDecl ::= '<!NOTATION' S Name S * (ExternalID | PublicID) S? '>' * [83] PublicID ::= 'PUBLIC' S PubidLiteral * </pre> * <P>NOTE: the '<!NOTATION' has already been read. */ private void parseNotationDecl () throws Exception { String nname, ids[]; requireWhitespace (); nname = readNmtoken (true); requireWhitespace (); // Read the external identifiers. ids = readExternalIds (true); if (ids [0] == null && ids [1] == null) { error ("external identifer missing", nname, null); } // Register the notation. setNotation (nname, ids [0], ids [1]); skipWhitespace (); require ('>'); } /** * Parse character data. * <pre> * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) * </pre> */ private void parseCharData () throws Exception { char c; // Start with a little cheat -- in most // cases, the entire sequence of // character data will already be in // the readBuffer; if not, fall through to // the normal approach. if (USE_CHEATS) { int lineAugment = 0; int columnAugment = 0;loop: for (int i = readBufferPos; i < readBufferLength; i++) { switch (c = readBuffer [i]) { case '\n': lineAugment++; columnAugment = 0; break; case '&': case '<': int start = readBufferPos; columnAugment++; readBufferPos = i; if (lineAugment > 0) { line += lineAugment; column = columnAugment; } else { column += columnAugment; } dataBufferAppend (readBuffer, start, i - start); return; case ']': // XXX missing two end-of-buffer cases if ((i + 2) < readBufferLength) { if (readBuffer [i + 1] == ']' && readBuffer [i + 2] == '>') { error ("character data may not contain ']]>'"); } } columnAugment++; break; default: if (c < 0x0020 || c > 0xFFFD) error ("illegal XML character U+" + Integer.toHexString (c)); // FALLTHROUGH case '\r': case '\t': columnAugment++; } } } // OK, the cheat didn't work; start over // and do it by the book. while (true) { c = readCh (); switch (c) { case '<': case '&': unread (c); return; // XXX "]]>" precluded ... default: dataBufferAppend (c); break; } } } ////////////////////////////////////////////////////////////////////// // High-level reading and scanning methods. ////////////////////////////////////////////////////////////////////// /** * Require whitespace characters. */ private void requireWhitespace () throws SAXException, IOException { char c = readCh (); if (isWhitespace (c)) { skipWhitespace (); } else { error ("whitespace required", c, null); } } /** * Parse whitespace characters, and leave them in the data buffer. */ private void parseWhitespace () throws Exception { char c = readCh (); while (isWhitespace (c)) { dataBufferAppend (c); c = readCh (); } unread (c); } /** * Skip whitespace characters. * <pre> * [3] S ::= (#x20 | #x9 | #xd | #xa)+ * </pre> */ private void skipWhitespace () throws SAXException, IOException { // Start with a little cheat. Most of // the time, the white space will fall // within the current read buffer; if // not, then fall through. if (USE_CHEATS) { int lineAugment = 0; int columnAugment = 0;loop: for (int i = readBufferPos; i < readBufferLength; i++) { switch (readBuffer [i]) { case ' ': case '\t': case '\r': columnAugment++; break; case '\n': lineAugment++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -