📄 xmlparser.java
字号:
columnAugment = 0; break; case '%': if (expandPE) break loop; // else fall through... default: readBufferPos = i; if (lineAugment > 0) { line += lineAugment; column = columnAugment; } else { column += columnAugment; } return; } } } // OK, do it by the book. char c = readCh (); while (isWhitespace (c)) { c = readCh (); } unread (c); } /** * Read a name or (when parsing an enumeration) name token. * <pre> * [5] Name ::= (Letter | '_' | ':') (NameChar)* * [7] Nmtoken ::= (NameChar)+ * </pre> */ private String readNmtoken (boolean isName) throws SAXException, IOException { char c; if (USE_CHEATS) {loop: for (int i = readBufferPos; i < readBufferLength; i++) { c = readBuffer [i]; switch (c) { case '%': if (expandPE) break loop; // else fall through... // What may legitimately come AFTER a name/nmtoken? case '<': case '>': case '&': case ',': case '|': case '*': case '+': case '?': case ')': case '=': case '\'': case '"': case '[': case ' ': case '\t': case '\r': case '\n': case ';': case '/': int start = readBufferPos; if (i == start) error ("name expected", readBuffer [i], null); readBufferPos = i; return intern (readBuffer, start, i - start); default: // punt on exact tests from Appendix A; approximate // them using the Unicode ID start/part rules if (i == readBufferPos && isName) { if (!Character.isUnicodeIdentifierStart (c) && c != ':' && c != '_') error ("Not a name start character, U+" + Integer.toHexString (c)); } else if (!Character.isUnicodeIdentifierPart (c) && c != '-' && c != ':' && c != '_' && c != '.' && !isExtender (c)) error ("Not a name character, U+" + Integer.toHexString (c)); } } } nameBufferPos = 0; // Read the first character.loop: while (true) { c = readCh (); switch (c) { case '%': case '<': case '>': case '&': case ',': case '|': case '*': case '+': case '?': case ')': case '=': case '\'': case '"': case '[': case ' ': case '\t': case '\n': case '\r': case ';': case '/': unread (c); if (nameBufferPos == 0) { error ("name expected"); } // punt on exact tests from Appendix A, but approximate them if (isName && !Character.isUnicodeIdentifierStart ( nameBuffer [0]) && ":_".indexOf (nameBuffer [0]) == -1) error ("Not a name start character, U+" + Integer.toHexString (nameBuffer [0])); String s = intern (nameBuffer, 0, nameBufferPos); nameBufferPos = 0; return s; default: // punt on exact tests from Appendix A, but approximate them if ((nameBufferPos != 0 || !isName) && !Character.isUnicodeIdentifierPart (c) && ":-_.".indexOf (c) == -1 && !isExtender (c)) error ("Not a name character, U+" + Integer.toHexString (c)); if (nameBufferPos >= nameBuffer.length) nameBuffer = (char[]) extendArray (nameBuffer, nameBuffer.length, nameBufferPos); nameBuffer [nameBufferPos++] = c; } } } private static boolean isExtender (char c) { // [88] Extender ::= ... return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 || (c >= 0x3031 && c <= 0x3035) || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe); } /** * Read a literal. With matching single or double quotes as * delimiters (and not embedded!) this is used to parse: * <pre> * [9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ... * [10] AttValue ::= ... ([^<&] | Reference)* ... * [11] SystemLiteral ::= ... (URLchar - "'")* ... * [12] PubidLiteral ::= ... (PubidChar - "'")* ... * </pre> * as well as the quoted strings in XML and text declarations * (for version, encoding, and standalone) which have their * own constraints. */ private String readLiteral (int flags) throws SAXException, IOException { char delim, c; int startLine = line; boolean saved = expandPE; // Find the first delimiter. delim = readCh (); if (delim != '"' && delim != '\'' && delim != (char) 0) { error ("expected '\"' or \"'\"", delim, null); return null; } inLiteral = true; if ((flags & LIT_DISABLE_PE) != 0) expandPE = false; // Each level of input source has its own buffer; remember // ours, so we won't read the ending delimiter from any // other input source, regardless of entity processing. char ourBuf [] = readBuffer; // Read the literal. try { c = readCh ();loop: while (! (c == delim && readBuffer == ourBuf)) { switch (c) { // Can't escape this normalization for attributes case '\n': case '\r': case '\t': if ((flags & LIT_ATTRIBUTE) != 0) c = ' '; break; case '&': c = readCh (); // Char refs are expanded immediately, except for // all the cases where it's deferred. if (c == '#') { if ((flags & LIT_DISABLE_CREF) != 0) { dataBufferAppend ('&'); dataBufferAppend ('#'); continue; } parseCharRef (); // It looks like an entity ref ... } else { unread (c); // Expand it? if ((flags & LIT_ENTITY_REF) > 0) { parseEntityRef (false); // Is it just data? } else if ((flags & LIT_DISABLE_EREF) != 0) { dataBufferAppend ('&'); // OK, it will be an entity ref -- expanded later. } else { String name = readNmtoken (true); require (';'); if ((flags & LIT_ENTITY_CHECK) != 0 && getEntityType (name) == ENTITY_UNDECLARED) { error ("General entity '" + name + "' must be declared before use"); } dataBufferAppend ('&'); dataBufferAppend (name); dataBufferAppend (';'); } } c = readCh (); continue loop; case '<': // and why? Perhaps so "&foo;" expands the same // inside and outside an attribute? if ((flags & LIT_ATTRIBUTE) != 0) error ("attribute values may not contain '<'"); break; // We don't worry about case '%' and PE refs, readCh does. default: break; } dataBufferAppend (c); c = readCh (); } } catch (EOFException e) { error ("end of input while looking for delimiter (started on line " + startLine + ')', null, new Character (delim).toString ()); } inLiteral = false; expandPE = saved; // Normalise whitespace if necessary. if ((flags & LIT_NORMALIZE) > 0) { dataBufferNormalize (); } // Return the value. return dataBufferToString (); } /** * Try reading external identifiers. * A system identifier is not required for notations. * @param inNotation Are we in a notation? * @return A two-member String array containing the identifiers. */ private String[] readExternalIds (boolean inNotation) throws Exception { char c; String ids[] = new String [2]; int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; if (tryRead ("PUBLIC")) { requireWhitespace (); ids [0] = readLiteral (LIT_NORMALIZE | flags); if (inNotation) { skipWhitespace (); c = readCh (); unread (c); if (c == '"' || c == '\'') { ids [1] = readLiteral (flags); } } else { requireWhitespace (); ids [1] = readLiteral (flags); } for (int i = 0; i < ids [0].length (); i++) { c = ids [0].charAt (i); if (c >= 'a' && c <= 'z') continue; if (c >= 'A' && c <= 'Z') continue; if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1) continue; error ("illegal PUBLIC id character U+" + Integer.toHexString (c)); } } else if (tryRead ("SYSTEM")) { requireWhitespace (); ids [1] = readLiteral (flags); } // XXX should normalize system IDs as follows: // - Convert to UTF-8 // - Map reserved and non-ASCII characters to %HH return ids; } /** * Test if a character is whitespace. * <pre> * [3] S ::= (#x20 | #x9 | #xd | #xa)+ * </pre> * @param c The character to test. * @return true if the character is whitespace. */ private final boolean isWhitespace (char c) { if (c > 0x20) return false; if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d) return true; return false; // illegal ... } ////////////////////////////////////////////////////////////////////// // Utility routines. ////////////////////////////////////////////////////////////////////// /** * Add a character to the data buffer. */ private void dataBufferAppend (char c) { // Expand buffer if necessary. if (dataBufferPos >= dataBuffer.length) dataBuffer = (char[]) extendArray (dataBuffer, dataBuffer.length, dataBufferPos); dataBuffer [dataBufferPos++] = c; } /** * Add a string to the data buffer. */ private void dataBufferAppend (String s) { dataBufferAppend (s.toCharArray (), 0, s.length ()); } /** * Append (part of) a character array to the data buffer. */ private void dataBufferAppend (char ch[], int start, int length) { dataBuffer = (char[]) extendArray (dataBuffer, dataBuffer.length, dataBufferPos + length); System.arraycopy (ch, start, dataBuffer, dataBufferPos, length); dataBufferPos += length; } /** * Normalise whitespace in the data buffer. */ private void dataBufferNormalize () { int i = 0; int j = 0; int end = dataBufferPos; // Skip whitespace at the start. while (j < end && isWhitespace (dataBuffer [j])) { j++; } // Skip whitespace at the end. while (end > j && isWhitespace (dataBuffer [end - 1])) { end --; } // Start copying to the left. while (j < end) { char c = dataBuffer [j++]; // Normalise all other whitespace to // a single space. if (isWhitespace (c)) { while (j < end && isWhitespace (dataBuffer [j++])) {} dataBuffer [i++] = ' '; dataBuffer [i++] = dataBuffer [j - 1]; } else { dataBuffer [i++] = c; } } // The new length is <= the old one. dataBufferPos = i; } /** * Convert the data buffer to a string. */ private String dataBufferToString () { String s = new String (dataBuffer, 0, dataBufferPos); dataBufferPos = 0; return s; } /** * Flush the contents of the data buffer to the handler, as * appropriate, and reset the buffer for new input. */ private void dataBufferFlush () throws SAXException { if (currentElementContent == CONTENT_ELEMENTS && dataBufferPos > 0 && !inCDATA ) { // We can't just trust the buffer to be whitespace, there // are cases when it isn't for (int i = 0; i < dataBufferPos; i++) { if (!isWhitespace (dataBuffer [i])) { handler.charData (dataBuffer, 0, dataBufferPos); dataBufferPos = 0; } } if (dataBufferPos > 0) { handler.ignorableWhitespace (dataBuffer, 0, dataBufferPos); dataBufferPos = 0; } } else if (dataBufferPos > 0) { handler.charData (dataBuffer, 0, dataBufferPos); dataBufferPos = 0; } } /** * Require a string to appear, or throw an exception. * <p><em>Precondition:</em> Entity expansion is not required. * <p><em>Precondition:</em> data buffer has no characters that * will get sent to the application. */ private void require (String delim) throws SAXException, IOException { int length = delim.length (); char ch []; if (length < dataBuffer.length) { ch = dataBuffer; delim.getChars (0, length, ch, 0); } else ch = delim.toCharArray (); if (USE_CHEATS && length <= (readBufferLength - readBufferPos)) { int offset = readBufferPos; for (int i = 0; i < length; i++, offset++) if (ch [i] != readBuffer [offset]) error ("required string", null, delim); readBufferPos = offset; } else { for (int i = 0; i < length; i++) require (ch [i]); } } /** * Require a character to appear, or throw an exception. */ private void require (char delim) throws SAXException, IOException { char c = readCh (); if (c != delim) { error ("required character", c, new Character (delim).toString ()); } } /** * Create an interned string from a character array. * Ælfred uses this method to create an interned version * of all names and name tokens, so that it can test equality * with <code>==</code> instead of <code>String.equals ()</code>. * * <p>This is much more efficient than constructing a non-interned * string first, and then interning it. * * @param ch an array of characters for building the string. * @param start the starting position in the array. * @param length the number of characters to place in the string. * @return an interned string. * @see #intern (String) * @see java.lang.String#i
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -