📄 xmlparser.java
字号:
stream.close(); } catch (IOException e) { /* ignore */ } } if (is != null) { try { is.close(); } catch (IOException e) { /* ignore */ } } scratch = null; } } ////////////////////////////////////////////////////////////////////// // Error reporting. ////////////////////////////////////////////////////////////////////// /** * Report an error. * @param message The error message. * @param textFound The text that caused the error (or null). * @see SAXDriver#error * @see #line */ private void error(String message, String textFound, String textExpected) throws SAXException { if (textFound != null) { message = message + " (found \"" + textFound + "\")"; } if (textExpected != null) { message = message + " (expected \"" + textExpected + "\")"; } handler.fatal(message); // "can't happen" throw new SAXException(message); } /** * Report a serious error. * @param message The error message. * @param textFound The text that caused the error (or null). */ private void error(String message, char textFound, String textExpected) throws SAXException { error(message, new Character(textFound).toString(), textExpected); } /** * Report typical case fatal errors. */ private void error(String message) throws SAXException { handler.fatal(message); } ////////////////////////////////////////////////////////////////////// // Major syntactic productions. ////////////////////////////////////////////////////////////////////// /** * Parse an XML document. * <pre> * [1] document ::= prolog element Misc* * </pre> * <p>This is the top-level parsing function for a single XML * document. As a minimum, a well-formed document must have * a document element, and a valid document must have a prolog * (one with doctype) as well. */ private void parseDocument() throws Exception { try { // added by MHK boolean sawDTD = parseProlog(); require('<'); parseElement(!sawDTD); } catch (EOFException ee) { // added by MHK error("premature end of file", "[EOF]", null); } try { parseMisc(); //skip all white, PIs, and comments char c = readCh(); //if this doesn't throw an exception... error("unexpected characters after document end", c, null); } catch (EOFException e) { return; } } static final char[] startDelimComment = { '<', '!', '-', '-' }; static final char[] endDelimComment = { '-', '-' }; /** * Skip a comment. * <pre> * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->" * </pre> * <p> (The <code><!--</code> has already been read.) */ private void parseComment() throws Exception { char c; boolean saved = expandPE; expandPE = false; parseUntil(endDelimComment); require('>'); expandPE = saved; handler.comment(dataBuffer, 0, dataBufferPos); dataBufferPos = 0; } static final char[] startDelimPI = { '<', '?' }; static final char[] endDelimPI = { '?', '>' }; /** * Parse a processing instruction and do a call-back. * <pre> * [16] PI ::= '<?' PITarget * (S (Char* - (Char* '?>' Char*)))? * '?>' * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') ) * </pre> * <p> (The <code><?</code> has already been read.) */ private void parsePI() throws SAXException, IOException { String name; boolean saved = expandPE; expandPE = false; name = readNmtoken(true); //NE08 if (name.indexOf(':') >= 0) { error("Illegal character(':') in processing instruction name ", name, null); } if ("xml".equalsIgnoreCase(name)) { error("Illegal processing instruction target", name, null); } if (!tryRead(endDelimPI)) { requireWhitespace(); parseUntil(endDelimPI); } expandPE = saved; handler.processingInstruction(name, dataBufferToString()); } static final char[] endDelimCDATA = { ']', ']', '>' }; private boolean isDirtyCurrentElement; /** * Parse a CDATA section. * <pre> * [18] CDSect ::= CDStart CData CDEnd * [19] CDStart ::= '<![CDATA[' * [20] CData ::= (Char* - (Char* ']]>' Char*)) * [21] CDEnd ::= ']]>' * </pre> * <p> (The '<![CDATA[' has already been read.) */ private void parseCDSect() throws Exception { parseUntil(endDelimCDATA); dataBufferFlush(); } /** * Parse the prolog of an XML document. * <pre> * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)? * </pre> * <p>We do not look for the XML declaration here, because it was * handled by pushURL (). * @see pushURL * @return true if a DTD was read. */ private boolean parseProlog() throws Exception { parseMisc(); if (tryRead("<!DOCTYPE")) { parseDoctypedecl(); parseMisc(); return true; } return false; } private void checkLegalVersion(String version) throws SAXException { int len = version.length(); for (int i = 0; i < len; i++) { char c = version.charAt(i); if ('0' <= c && c <= '9') { continue; } if (c == '_' || c == '.' || c == ':' || c == '-') { continue; } if ('a' <= c && c <= 'z') { continue; } if ('A' <= c && c <= 'Z') { continue; } error ("illegal character in version", version, "1.0"); } } /** * Parse the XML declaration. * <pre> * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' * [24] VersionInfo ::= S 'version' Eq * ("'" VersionNum "'" | '"' VersionNum '"' ) * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')* * [32] SDDecl ::= S 'standalone' Eq * ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' ) * [80] EncodingDecl ::= S 'encoding' Eq * ( "'" EncName "'" | "'" EncName "'" ) * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* * </pre> * <p> (The <code><?xml</code> and whitespace have already been read.) * @return the encoding in the declaration, uppercased; or null * @see #parseTextDecl * @see #setupDecoding */ private String parseXMLDecl(boolean ignoreEncoding) throws SAXException, IOException { String version; String encodingName = null; String standalone = null; int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; String inputEncoding = null; switch (this.encoding) { case ENCODING_EXTERNAL: case ENCODING_UTF_8: inputEncoding = "UTF-8"; break; case ENCODING_ISO_8859_1: inputEncoding = "ISO-8859-1"; break; case ENCODING_UCS_2_12: inputEncoding = "UTF-16BE"; break; case ENCODING_UCS_2_21: inputEncoding = "UTF-16LE"; break; } // Read the version. require("version"); parseEq(); checkLegalVersion(version = readLiteral(flags)); if (!version.equals("1.0")) { if (version.equals("1.1")) { handler.warn("expected XML version 1.0, not: " + version); xmlVersion = XML_11; } else { error("illegal XML version", version, "1.0 or 1.1"); } } else { xmlVersion = XML_10; } // Try reading an encoding declaration. boolean white = tryWhitespace(); if (tryRead("encoding")) { if (!white) { error("whitespace required before 'encoding='"); } parseEq(); encodingName = readLiteral(flags); if (!ignoreEncoding) { setupDecoding(encodingName); } } // Try reading a standalone declaration if (encodingName != null) { white = tryWhitespace(); } if (tryRead("standalone")) { if (!white) { error("whitespace required before 'standalone='"); } parseEq(); standalone = readLiteral(flags); if ("yes".equals(standalone)) { docIsStandalone = true; } else if (!"no".equals(standalone)) { error("standalone flag must be 'yes' or 'no'"); } } skipWhitespace(); require("?>"); if (inputEncoding == null) { inputEncoding = encodingName; } return encodingName; } /** * Parse a text declaration. * <pre> * [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' * [80] EncodingDecl ::= S 'encoding' Eq * ( '"' EncName '"' | "'" EncName "'" ) * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* * </pre> * <p> (The <code><?xml</code>' and whitespace have already been read.) * @return the encoding in the declaration, uppercased; or null * @see #parseXMLDecl * @see #setupDecoding */ private String parseTextDecl(boolean ignoreEncoding) throws SAXException, IOException { String encodingName = null; int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; // Read an optional version. if (tryRead ("version")) { String version; parseEq(); checkLegalVersion(version = readLiteral(flags)); if (version.equals("1.1")) { if (xmlVersion == XML_10) { error("external subset has later version number.", "1.0", version); } handler.warn("expected XML version 1.0, not: " + version); xmlVersion = XML_11; } else if (!version.equals("1.0")) { error("illegal XML version", version, "1.0 or 1.1"); } requireWhitespace(); } // Read the encoding. require("encoding"); parseEq(); encodingName = readLiteral(flags); if (!ignoreEncoding) { setupDecoding(encodingName); } skipWhitespace(); require("?>"); return encodingName; } /** * Sets up internal state so that we can decode an entity using the * specified encoding. This is used when we start to read an entity * and we have been given knowledge of its encoding before we start to * read any data (e.g. from a SAX input source or from a MIME type). * * <p> It is also used after autodetection, at which point only very * limited adjustments to the encoding may be used (switching between * related builtin decoders). * * @param encodingName The name of the encoding specified by the user. * @exception IOException if the encoding isn't supported either * internally to this parser, or by the hosting JVM. * @see #parseXMLDecl * @see #parseTextDecl */ private void setupDecoding(String encodingName) throws SAXException, IOException { encodingName = encodingName.toUpperCase(); // ENCODING_EXTERNAL indicates an encoding that wasn't // autodetected ... we can use builtin decoders, or // ones from the JVM (InputStreamReader). // Otherwise we can only tweak what was autodetected, and // only for single byte (ASCII derived) builtin encodings. // ASCII-derived encodings if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) { if (encodingName.equals("ISO-8859-1") || encodingName.equals("8859_1") || encodingName.equals("ISO8859_1")) { encoding = ENCODING_ISO_8859_1; return; } else if (encodingName.equals("US-ASCII") || encodingName.equals("ASCII")) { encoding = ENCODING_ASCII; return; } else if (encodingName.equals("UTF-8") || encodingName.equals("UTF8")) { encoding = ENCODING_UTF_8; return; } else if (encoding != ENCODING_EXTERNAL) { // used to start with a new reader ... throw new UnsupportedEncodingException(encodingName); } // else fallthrough ... // it's ASCII-ish and something other than a builtin } // Unicode and such if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) { if (!(encodingName.equals("ISO-10646-UCS-2") || encodingName.equals("UTF-16") || encodingName.equals("UTF-16BE") || encodingName.equals("UTF-16LE"))) { error("unsupported Unicode encoding", encodingName, "UTF-16"); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -