📄 xmlparser.java
字号:
return; } // four byte encodings if (encoding == ENCODING_UCS_4_1234 || encoding == ENCODING_UCS_4_4321 || encoding == ENCODING_UCS_4_2143 || encoding == ENCODING_UCS_4_3412) { // Strictly: "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists if (!encodingName.equals("ISO-10646-UCS-4")) { error("unsupported 32-bit encoding", encodingName, "ISO-10646-UCS-4"); } return; } // assert encoding == ENCODING_EXTERNAL // if (encoding != ENCODING_EXTERNAL) // throw new RuntimeException ("encoding = " + encoding); if (encodingName.equals("UTF-16BE")) { encoding = ENCODING_UCS_2_12; return; } if (encodingName.equals("UTF-16LE")) { encoding = ENCODING_UCS_2_21; return; } // We couldn't use the builtin decoders at all. But we can try to // create a reader, since we haven't messed up buffering. Tweak // the encoding name if necessary. if (encodingName.equals("UTF-16") || encodingName.equals("ISO-10646-UCS-2")) { encodingName = "Unicode"; } // Ignoring all the EBCDIC aliases here reader = new InputStreamReader(is, encodingName); sourceType = INPUT_READER; } /** * Parse miscellaneous markup outside the document element and DOCTYPE * declaration. * <pre> * [27] Misc ::= Comment | PI | S * </pre> */ private void parseMisc() throws Exception { while (true) { skipWhitespace(); if (tryRead(startDelimPI)) { parsePI(); } else if (tryRead(startDelimComment)) { parseComment(); } else { return; } } } /** * Parse a document type declaration. * <pre> * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' * </pre> * <p> (The <code><!DOCTYPE</code> has already been read.) */ private void parseDoctypedecl() throws Exception { String rootName; ExternalIdentifiers ids; // Read the document type name. requireWhitespace(); rootName = readNmtoken(true); // Read the External subset's IDs skipWhitespace(); ids = readExternalIds(false, true); // report (a) declaration of name, (b) lexical info (ids) handler.doctypeDecl(rootName, ids.publicId, ids.systemId); // Internal subset is parsed first, if present skipWhitespace(); if (tryRead('[')) { // loop until the subset ends while (true) { doReport = expandPE = true; skipWhitespace(); doReport = expandPE = false; if (tryRead(']')) { break; // end of subset } else { // WFC, PEs in internal subset (only between decls) peIsError = expandPE = true; parseMarkupdecl(); peIsError = expandPE = false; } } } skipWhitespace(); require('>'); // Read the external subset, if any InputSource subset; if (ids.systemId == null) { subset = handler.getExternalSubset(rootName, handler.getSystemId()); } else { subset = null; } if (ids.systemId != null || subset != null) { pushString(null, ">"); // NOTE: [dtd] is so we say what SAX2 expects, // though it's misleading (subset, not entire dtd) if (ids.systemId != null) { pushURL(true, "[dtd]", ids, null, null, null, true); } else { handler.warn("modifying document by adding external subset"); pushURL(true, "[dtd]", new ExternalIdentifiers(subset.getPublicId(), subset.getSystemId(), null), subset.getCharacterStream(), subset.getByteStream(), subset.getEncoding(), false); } // Loop until we end up back at '>' while (true) { doReport = expandPE = true; skipWhitespace(); doReport = expandPE = false; if (tryRead('>')) { break; } else { expandPE = true; parseMarkupdecl(); expandPE = false; } } // the ">" string isn't popped yet if (inputStack.size() != 1) { error("external subset has unmatched '>'"); } } // done dtd handler.endDoctype(); expandPE = false; doReport = true; } /** * Parse a markup declaration in the internal or external DTD subset. * <pre> * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl * | NotationDecl | PI | Comment * [30] extSubsetDecl ::= (markupdecl | conditionalSect * | PEReference | S) * * </pre> * <p> Reading toplevel PE references is handled as a lexical issue * by the caller, as is whitespace. */ private void parseMarkupdecl() throws Exception { char[] saved = null; boolean savedPE = expandPE; // prevent "<%foo;" and ensures saved entity is right require('<'); unread('<'); expandPE = false; if (tryRead("<!ELEMENT")) { saved = readBuffer; expandPE = savedPE; parseElementDecl(); } else if (tryRead("<!ATTLIST")) { saved = readBuffer; expandPE = savedPE; parseAttlistDecl(); } else if (tryRead("<!ENTITY")) { saved = readBuffer; expandPE = savedPE; parseEntityDecl(); } else if (tryRead("<!NOTATION")) { saved = readBuffer; expandPE = savedPE; parseNotationDecl(); } else if (tryRead(startDelimPI)) { saved = readBuffer; expandPE = savedPE; parsePI(); } else if (tryRead(startDelimComment)) { saved = readBuffer; expandPE = savedPE; parseComment(); } else if (tryRead("<![")) { saved = readBuffer; expandPE = savedPE; if (inputStack.size() > 0) { parseConditionalSect(saved); } else { error("conditional sections illegal in internal subset"); } } else { error("expected markup declaration"); } // VC: Proper Decl/PE Nesting if (readBuffer != saved) { handler.verror("Illegal Declaration/PE nesting"); } } /** * Parse an element, with its tags. * <pre> * [39] element ::= EmptyElementTag | STag content ETag * [40] STag ::= '<' Name (S Attribute)* S? '>' * [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>' * </pre> * <p> (The '<' has already been read.) * <p>NOTE: this method actually chains onto parseContent (), if necessary, * and parseContent () will take care of calling parseETag (). */ private void parseElement(boolean maybeGetSubset) throws Exception { String gi; char c; int oldElementContent = currentElementContent; String oldElement = currentElement; ElementDecl element; // This is the (global) counter for the // array of specified attributes. tagAttributePos = 0; // Read the element type name. gi = readNmtoken(true); // If we saw no DTD, and this is the document root element, // let the application modify the input stream by providing one. if (maybeGetSubset) { InputSource subset = handler.getExternalSubset(gi, handler.getSystemId()); if (subset != null) { String publicId = subset.getPublicId(); String systemId = subset.getSystemId(); handler.warn("modifying document by adding DTD"); handler.doctypeDecl(gi, publicId, systemId); pushString(null, ">"); // NOTE: [dtd] is so we say what SAX2 expects, // though it's misleading (subset, not entire dtd) pushURL(true, "[dtd]", new ExternalIdentifiers(publicId, systemId, null), subset.getCharacterStream(), subset.getByteStream(), subset.getEncoding(), false); // Loop until we end up back at '>' while (true) { doReport = expandPE = true; skipWhitespace(); doReport = expandPE = false; if (tryRead('>')) { break; } else { expandPE = true; parseMarkupdecl(); expandPE = false; } } // the ">" string isn't popped yet if (inputStack.size() != 1) { error("external subset has unmatched '>'"); } handler.endDoctype(); } } // Determine the current content type. currentElement = gi; element = (ElementDecl) elementInfo.get(gi); currentElementContent = getContentType(element, CONTENT_ANY); // Read the attributes, if any. // After this loop, "c" is the closing delimiter. boolean white = tryWhitespace(); c = readCh(); while (c != '/' && c != '>') { unread(c); if (!white) { error("need whitespace between attributes"); } parseAttribute(gi); white = tryWhitespace(); c = readCh(); } // Supply any defaulted attributes. Iterator atts = declaredAttributes(element); if (atts != null) { String aname;loop: while (atts.hasNext()) { aname = (String) atts.next(); // See if it was specified. for (int i = 0; i < tagAttributePos; i++) { if (tagAttributes[i] == aname) { continue loop; } } // ... or has a default String value = getAttributeDefaultValue(gi, aname); if (value == null) { continue; } handler.attribute(aname, value, false); } } // Figure out if this is a start tag // or an empty element, and dispatch an // event accordingly. switch (c) { case '>': handler.startElement(gi); parseContent(); break; case '/': require('>'); handler.startElement(gi); handler.endElement(gi); break; } // Restore the previous state. currentElement = oldElement; currentElementContent = oldElementContent; } /** * Parse an attribute assignment. * <pre> * [41] Attribute ::= Name Eq AttValue * </pre> * @param name The name of the attribute's element. * @see SAXDriver#attribute */ private void parseAttribute(String name) throws Exception { String aname; String type; String value; int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; // Read the attribute name. aname = readNmtoken(true); type = getAttributeType(name, aname); // Parse '=' parseEq(); // Read the value, normalizing whitespace // unless it is CDATA. if (handler.stringInterning) { if (type == "CDATA" || type == null) { value = readLiteral(flags); } else { value = readLiteral(flags | LIT_NORMALIZE); } } else { if (type == null || type.equals("CDATA")) { value = readLiteral(flags); } else { value = readLiteral(flags | LIT_NORMALIZE); } } // WFC: no duplicate attributes for (int i = 0; i < tagAttributePos; i++) { if (aname.equals(tagAttributes [i])) { error("duplicate attribute", aname, null); } } // Inform the handler about the // attribute. handler.attribute(aname, value, true);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -