📄 xmlparser.java
字号:
name = readNmtoken(true); if (!tryRead("?>")) { requireWhitespace(); parseUntil("?>"); } if (handler != null) { handler.processingInstruction(name, dataBufferToString()); } } /** * Parse a CDATA marked section. * <pre> * [20] CDSect ::= CDStart CData CDEnd * [21] CDStart ::= '<![CDATA[' * [22] CData ::= (Char* - (Char* ']]>' Char*)) * [23] CDEnd ::= ']]>' * </pre> * <p>(The '<![CDATA[' has already been read.) * <p>Note that this just appends characters to the dataBuffer, * without actually generating an event. */ void parseCDSect () throws java.lang.Exception { parseUntil("]]>"); } /** * Parse the prolog of an XML document. * <pre> * [24] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)? * </pre> * <p>There are a couple of tricks here. First, it is necessary to * declare the XML default attributes after the DTD (if present) * has been read. Second, it is not possible to expand general * references in attribute value literals until after the entire * DTD (if present) has been parsed. * <p>We do not look for the XML declaration here, because it is * handled by pushURL(). * @see pushURL */ void parseProlog () throws java.lang.Exception { parseMisc(); if (tryRead("<!DOCTYPE")) { parseDoctypedecl(); parseMisc(); } } /** * Parse the XML declaration. * <pre> * [25] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' * [26] VersionInfo ::= S 'version' Eq ('"1.0"' | "'1.0'") * [33] SDDecl ::= S 'standalone' Eq "'" ('yes' | 'no') "'" * | S 'standalone' Eq '"' ("yes" | "no") '"' * [78] EncodingDecl ::= S 'encoding' Eq QEncoding * </pre> * <p>([80] to [82] are also significant.) * <p>(The <code><?xml</code> and whitespace have already been read.) * <p>TODO: validate value of standalone. * @see #parseTextDecl * @see #checkEncoding */ void parseXMLDecl (boolean ignoreEncoding) throws java.lang.Exception { String version; String encodingName = null; String standalone = null; // Read the version. require("version"); parseEq(); version = readLiteral(0); if (!version.equals("1.0")) { error("unsupported XML version", version, "1.0"); } // Try reading an encoding declaration. skipWhitespace(); if (tryRead("encoding")) { parseEq(); encodingName = readLiteral(0); checkEncoding(encodingName, ignoreEncoding); } // Try reading a standalone declaration skipWhitespace(); if (tryRead("standalone")) { parseEq(); standalone = readLiteral(0); } skipWhitespace(); require("?>"); } /** * Parse the Encoding PI. * <pre> * [78] EncodingDecl ::= S 'encoding' Eq QEncoding * [79] EncodingPI ::= '<?xml' S 'encoding' Eq QEncoding S? '?>' * [80] QEncoding ::= '"' Encoding '"' | "'" Encoding "'" * [81] Encoding ::= LatinName * [82] LatinName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* * </pre> * <p>(The <code><?xml</code>' and whitespace have already been read.) * @see #parseXMLDecl * @see #checkEncoding */ void parseTextDecl (boolean ignoreEncoding) throws java.lang.Exception { String encodingName = null; // Read an optional version. if (tryRead("version")) { String version; parseEq(); version = readLiteral(0); if (!version.equals("1.0")) { error("unsupported XML version", version, "1.0"); } requireWhitespace(); } // Read the encoding. require("encoding"); parseEq(); encodingName = readLiteral(0); checkEncoding(encodingName, ignoreEncoding); skipWhitespace(); require("?>"); } /** * Check that the encoding specified makes sense. * <p>Compare what the author has specified in the XML declaration * or encoding PI with what we have detected. * <p>This is also important for distinguishing among the various * 7- and 8-bit encodings, such as ISO-LATIN-1 (I cannot autodetect * those). * @param encodingName The name of the encoding specified by the user. * @see #parseXMLDecl * @see #parseTextDecl */ void checkEncoding (String encodingName, boolean ignoreEncoding) throws java.lang.Exception { encodingName = encodingName.toUpperCase(); if (ignoreEncoding) { return; } switch (encoding) { // 8-bit encodings case ENCODING_UTF_8: if (encodingName.equals("ISO-8859-1")) { encoding = ENCODING_ISO_8859_1; } else if (!encodingName.equals("UTF-8")) { error("unsupported 8-bit encoding", encodingName, "UTF-8 or ISO-8859-1"); } break; // 16-bit encodings case ENCODING_UCS_2_12: case ENCODING_UCS_2_21: if (!encodingName.equals("ISO-10646-UCS-2") && !encodingName.equals("UTF-16")) { error("unsupported 16-bit encoding", encodingName, "ISO-10646-UCS-2"); } break; // 32-bit encodings case ENCODING_UCS_4_1234: case ENCODING_UCS_4_4321: case ENCODING_UCS_4_2143: case ENCODING_UCS_4_3412: if (!encodingName.equals("ISO-10646-UCS-4")) { error("unsupported 32-bit encoding", encodingName, "ISO-10646-UCS-4"); } } } /** * Parse miscellaneous markup outside the document element and DOCTYPE * declaration. * <pre> * [27] Misc ::= Comment | PI | S * </pre> */ void parseMisc () throws java.lang.Exception { while (true) { skipWhitespace(); if (tryRead("<?")) {parsePI();} else if (tryRead("<!--")) {parseComment();} else {return;} } } /** * Parse a document type declaration. * <pre> * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? * ('[' %markupdecl* ']' S?)? '>' * </pre> * <p>(The <code><!DOCTYPE</code> has already been read.) */ void parseDoctypedecl () throws java.lang.Exception { char c; String doctypeName, ids[]; // Read the document type name. requireWhitespace(); doctypeName = readNmtoken(true); // Read the ExternalIDs. skipWhitespace(); ids = readExternalIds(false); // Look for a declaration subset. skipWhitespace(); if (tryRead('[')) { // loop until the subset ends while (true) { context = CONTEXT_DTD; skipWhitespace(); context = CONTEXT_NONE; if (tryRead(']')) { break; // end of subset } else { context = CONTEXT_DTD; parseMarkupdecl(); context = CONTEXT_NONE; } } } // Read the external subset, if any if (ids[1] != null) { pushURL("[external subset]", ids[0], ids[1], null, null, null); // Loop until we end up back at '>' while (true) { context = CONTEXT_DTD; skipWhitespace(); context = CONTEXT_NONE; if (tryRead('>')) { break; } else { context = CONTEXT_DTD; parseMarkupdecl(); context = CONTEXT_NONE; } } } else { // No external subset. skipWhitespace(); require('>'); } if (handler != null) { handler.doctypeDecl(doctypeName, ids[0], ids[1]); } // Expand general entities in // default values of attributes. // (Do this after the doctypeDecl // event!). // expandAttributeDefaultValues(); } /** * Parse a markup declaration in the internal or external DTD subset. * <pre> * [29] markupdecl ::= ( %elementdecl | %AttlistDecl | %EntityDecl | * %NotationDecl | %PI | %S | %Comment | * InternalPERef ) * [30] InternalPERef ::= PEReference * [31] extSubset ::= (%markupdecl | %conditionalSect)* * </pre> */ void parseMarkupdecl () throws java.lang.Exception { if (tryRead("<!ELEMENT")) { parseElementdecl(); } else if (tryRead("<!ATTLIST")) { parseAttlistDecl(); } else if (tryRead("<!ENTITY")) { parseEntityDecl(); } else if (tryRead("<!NOTATION")) { parseNotationDecl(); } else if (tryRead("<?")) { parsePI(); } else if (tryRead("<!--")) { parseComment(); } else if (tryRead("<![")) { parseConditionalSect(); } else { error("expected markup declaration", null, null); } } /** * Parse an element, with its tags. * <pre> * [33] STag ::= '<' Name (S Attribute)* S? '>' [WFC: unique Att spec] * [38] element ::= EmptyElement | STag content ETag * [39] EmptyElement ::= '<' Name (S Attribute)* S? '/>' * [WFC: unique Att spec] * </pre> * <p>(The '<' has already been read.) * <p>NOTE: this method actually chains onto parseContent(), if necessary, * and parseContent() will take care of calling parseETag(). */ void parseElement () throws java.lang.Exception { String gi; char c; int oldElementContent = currentElementContent; String oldElement = currentElement; // This is the (global) counter for the // array of specified attributes. tagAttributePos = 0; // Read the element type name. gi = readNmtoken(true); // Determine the current content type. currentElement = gi; currentElementContent = getElementContentType(gi); if (currentElementContent == CONTENT_UNDECLARED) { currentElementContent = CONTENT_ANY; } // Read the attributes, if any. // After this loop, we should be just // in front of the closing delimiter. skipWhitespace(); c = readCh(); while (c != '/' && c != '>') { unread(c); parseAttribute(gi); skipWhitespace(); c = readCh(); } unread(c); // Supply any defaulted attributes. Enumeration atts = declaredAttributes(gi); if (atts != null) { String aname; loop: while (atts.hasMoreElements()) { aname = (String)atts.nextElement(); // See if it was specified. for (int i = 0; i < tagAttributePos; i++) { if (tagAttributes[i] == aname) { continue loop; } } // I guess not... if (handler != null) { handler.attribute(aname, getAttributeExpandedValue(gi, aname), false); } } } // Figure out if this is a start tag // or an empty element, and dispatch an // event accordingly. c = readCh(); switch (c) { case '>': if (handler != null) { handler.startElement(gi); } parseContent(); break; case '/': require('>'); if (handler != null) { handler.startElement(gi); handler.endElement(gi); } break; } // Restore the previous state. currentElement = oldElement; currentElementContent = oldElementContent; } /** * Parse an attribute assignment. * <pre> * [34] Attribute ::= Name Eq AttValue * </pre> * @param name The name of the attribute's element. * @see XmlHandler#attribute */ void parseAttribute (String name) throws java.lang.Exception { String aname; int type; String value; // Read the attribute name. aname = readNmtoken(true).intern(); type = getAttributeDefaultValueType(name, aname); // Parse '=' parseEq(); // Read the value, normalizing whitespace // if it is not CDATA. if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) { value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF); } else { value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF | LIT_NORMALIZE); } // Inform the handler about the // attribute. if (handler != null) { handler.attribute(aname, value, true); } dataBufferPos = 0; // Note that the attribute has been // specified. if (tagAttributePos == tagAttributes.length) { String newAttrib[] = new String[tagAttributes.length * 2]; System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos); tagAttributes = newAttrib; } tagAttributes[tagAttributePos++] = aname; } /** * Parse an equals sign surrounded by optional whitespace. * [35] Eq ::= S? '=' S? */ void parseEq () throws java.lang.Exception { skipWhitespace(); require('='); skipWhitespace(); } /** * Parse an end tag. * [36] ETag ::= '</' Name S? '>' * *NOTE: parseContent() chains to here. */ void parseETag () throws java.lang.Exception { String name; name = readNmtoken(true); if (name != currentElement) { error("mismatched end tag", name, currentElement); } skipWhitespace(); require('>'); if (handler != null) { handler.endElement(name); } } /** * Parse the content of an element. * [37] content ::= (element | PCData | Reference | CDSect | PI | Comment)* * [68] Reference ::= EntityRef | CharRef */ void parseContent () throws java.lang.Exception { String data; char c; while (true) { switch (currentElementContent) { case CONTENT_ANY: case CONTENT_MIXED: parsePCData(); break; case CONTENT_ELEMENTS: parseWhitespace(); break; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -