📄 xmlparser.java
字号:
} // Check for NDATA declaration. boolean white = tryWhitespace (); if (!peFlag && tryRead ("NDATA")) { if (!white) error ("whitespace required before NDATA"); requireWhitespace (); notationName = readNmtoken (true); setExternalDataEntity (name, ids [0], ids [1], notationName); } else { setExternalTextEntity (name, ids [0], ids [1]); } } // Finish the declaration. skipWhitespace (); require ('>'); } /** * Parse a notation declaration. * <pre> * [82] NotationDecl ::= '<!NOTATION' S Name S * (ExternalID | PublicID) S? '>' * [83] PublicID ::= 'PUBLIC' S PubidLiteral * </pre> * <P>NOTE: the '<!NOTATION' has already been read. */ private void parseNotationDecl () throws Exception { String nname, ids[]; requireWhitespace (); nname = readNmtoken (true); requireWhitespace (); // Read the external identifiers. ids = readExternalIds (true); if (ids [0] == null && ids [1] == null) { error ("external identifer missing", nname, null); } // Register the notation. setNotation (nname, ids [0], ids [1]); skipWhitespace (); require ('>'); } /** * Parse character data. * <pre> * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) * </pre> */ private void parseCharData () throws Exception { char c; // Start with a little cheat -- in most // cases, the entire sequence of // character data will already be in // the readBuffer; if not, fall through to // the normal approach. if (USE_CHEATS) { int lineAugment = 0; int columnAugment = 0;loop: for (int i = readBufferPos; i < readBufferLength; i++) { switch (c = readBuffer [i]) { case '\n': lineAugment++; columnAugment = 0; break; case '&': case '<': int start = readBufferPos; columnAugment++; readBufferPos = i; if (lineAugment > 0) { line += lineAugment; column = columnAugment; } else { column += columnAugment; } dataBufferAppend (readBuffer, start, i - start); return; case ']': // XXX missing two end-of-buffer cases if ((i + 2) < readBufferLength) { if (readBuffer [i + 1] == ']' && readBuffer [i + 2] == '>') { error ("character data may not contain ']]>'"); } } columnAugment++; break; default: if (c < 0x0020 || c > 0xFFFD) error ("illegal XML character U+" + Integer.toHexString (c)); // FALLTHROUGH case '\r': case '\t': columnAugment++; } } } // OK, the cheat didn't work; start over // and do it by the book. while (true) { c = readCh (); switch (c) { case '<': case '&': unread (c); return; // XXX "]]>" precluded ... default: dataBufferAppend (c); break; } } } ////////////////////////////////////////////////////////////////////// // High-level reading and scanning methods. ////////////////////////////////////////////////////////////////////// /** * Require whitespace characters. */ private void requireWhitespace () throws SAXException, IOException { char c = readCh (); if (isWhitespace (c)) { skipWhitespace (); } else { error ("whitespace required", c, null); } } /** * Parse whitespace characters, and leave them in the data buffer. */ private void parseWhitespace () throws Exception { char c = readCh (); while (isWhitespace (c)) { dataBufferAppend (c); c = readCh (); } unread (c); } /** * Skip whitespace characters. * <pre> * [3] S ::= (#x20 | #x9 | #xd | #xa)+ * </pre> */ private void skipWhitespace () throws SAXException, IOException { // Start with a little cheat. Most of // the time, the white space will fall // within the current read buffer; if // not, then fall through. if (USE_CHEATS) { int lineAugment = 0; int columnAugment = 0;loop: for (int i = readBufferPos; i < readBufferLength; i++) { switch (readBuffer [i]) { case ' ': case '\t': case '\r': columnAugment++; break; case '\n': lineAugment++; columnAugment = 0; break; case '%': if (expandPE) break loop; // else fall through... default: readBufferPos = i; if (lineAugment > 0) { line += lineAugment; column = columnAugment; } else { column += columnAugment; } return; } } } // OK, do it by the book. char c = readCh (); while (isWhitespace (c)) { c = readCh (); } unread (c); } /** * Read a name or (when parsing an enumeration) name token. * <pre> * [5] Name ::= (Letter | '_' | ':') (NameChar)* * [7] Nmtoken ::= (NameChar)+ * </pre> */ private String readNmtoken (boolean isName) throws SAXException, IOException { char c; if (USE_CHEATS) {loop: for (int i = readBufferPos; i < readBufferLength; i++) { c = readBuffer [i]; switch (c) { case '%': if (expandPE) break loop; // else fall through... // What may legitimately come AFTER a name/nmtoken? case '<': case '>': case '&': case ',': case '|': case '*': case '+': case '?': case ')': case '=': case '\'': case '"': case '[': case ' ': case '\t': case '\r': case '\n': case ';': case '/': int start = readBufferPos; if (i == start) error ("name expected", readBuffer [i], null); readBufferPos = i; return intern (readBuffer, start, i - start); default: // punt on exact tests from Appendix A; approximate // them using the Unicode ID start/part rules if (i == readBufferPos && isName) { if (!Character.isUnicodeIdentifierStart (c) && c != ':' && c != '_') error ("Not a name start character, U+" + Integer.toHexString (c)); } else if (!Character.isUnicodeIdentifierPart (c) && c != '-' && c != ':' && c != '_' && c != '.' && !isExtender (c)) error ("Not a name character, U+" + Integer.toHexString (c)); } } } nameBufferPos = 0; // Read the first character.loop: while (true) { c = readCh (); switch (c) { case '%': case '<': case '>': case '&': case ',': case '|': case '*': case '+': case '?': case ')': case '=': case '\'': case '"': case '[': case ' ': case '\t': case '\n': case '\r': case ';': case '/': unread (c); if (nameBufferPos == 0) { error ("name expected"); } // punt on exact tests from Appendix A, but approximate them if (isName && !Character.isUnicodeIdentifierStart ( nameBuffer [0]) && ":_".indexOf (nameBuffer [0]) == -1) error ("Not a name start character, U+" + Integer.toHexString (nameBuffer [0])); String s = intern (nameBuffer, 0, nameBufferPos); nameBufferPos = 0; return s; default: // punt on exact tests from Appendix A, but approximate them if ((nameBufferPos != 0 || !isName) && !Character.isUnicodeIdentifierPart (c) && ":-_.".indexOf (c) == -1 && !isExtender (c)) error ("Not a name character, U+" + Integer.toHexString (c)); if (nameBufferPos >= nameBuffer.length) nameBuffer = (char[]) extendArray (nameBuffer, nameBuffer.length, nameBufferPos); nameBuffer [nameBufferPos++] = c; } } } private static boolean isExtender (char c) { // [88] Extender ::= ... return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 || (c >= 0x3031 && c <= 0x3035) || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe); } /** * Read a literal. With matching single or double quotes as * delimiters (and not embedded!) this is used to parse: * <pre> * [9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ... * [10] AttValue ::= ... ([^<&] | Reference)* ... * [11] SystemLiteral ::= ... (URLchar - "'")* ... * [12] PubidLiteral ::= ... (PubidChar - "'")* ... * </pre> * as well as the quoted strings in XML and text declarations * (for version, encoding, and standalone) which have their * own constraints. */ private String readLiteral (int flags) throws SAXException, IOException { char delim, c; int startLine = line; boolean saved = expandPE; // Find the first delimiter. delim = readCh (); if (delim != '"' && delim != '\'' && delim != (char) 0) { error ("expected '\"' or \"'\"", delim, null); return null; } inLiteral = true; if ((flags & LIT_DISABLE_PE) != 0) expandPE = false; // Each level of input source has its own buffer; remember // ours, so we won't read the ending delimiter from any // other input source, regardless of entity processing. char ourBuf [] = readBuffer; // Read the literal. try { c = readCh ();loop: while (! (c == delim && readBuffer == ourBuf)) { switch (c) { // Can't escape this normalization for attributes case '\n': case '\r': case '\t': if ((flags & LIT_ATTRIBUTE) != 0) c = ' '; break; case '&': c = readCh (); // Char refs are expanded immediately, except for // all the cases where it's deferred. if (c == '#') { if ((flags & LIT_DISABLE_CREF) != 0) { dataBufferAppend ('&'); dataBufferAppend ('#'); continue; } parseCharRef (); // It looks like an entity ref ... } else { unread (c); // Expand it? if ((flags & LIT_ENTITY_REF) > 0) { parseEntityRef (false); // Is it just data? } else if ((flags & LIT_DISABLE_EREF) != 0) { dataBufferAppend ('&'); // OK, it will be an entity ref -- expanded later. } else { String name = readNmtoken (true); require (';'); if ((flags & LIT_ENTITY_CHECK) != 0 && getEntityType (name) == ENTITY_UNDECLARED) { error ("General entity '" + name + "' must be declared before use"); } dataBufferAppend ('&'); dataBufferAppend (name); dataBufferAppend (';'); } } c = readCh (); continue loop; case '<': // and why? Perhaps so "&foo;" expands the same // inside and outside an attribute? if ((flags & LIT_ATTRIBUTE) != 0) error ("attribute values may not contain '<'"); break; // We don't worry about case '%' and PE refs, readCh does. default: break; } dataBufferAppend (c); c = readCh (); } } catch (EOFException e) { error ("end of input while looking for delimiter (started on line " + startLine + ')', null, new Character (delim).toString ()); } inLiteral = false; expandPE = saved; // Normalise whitespace if necessary. if ((flags & LIT_NORMALIZE) > 0) { dataBufferNormalize (); } // Return the value. return dataBufferToString (); } /** * Try reading external identifiers. * A system identifier is not required for notations. * @param inNotation Are we in a notation? * @return A two-member String array containing the identifiers. */ private String[] readExternalIds (boolean inNotation) throws Exception { char c; String ids[] = new String [2]; int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_D
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -