📄 xmlparser.java
字号:
} } } // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: if (value <= 0x0000ffff) { // no surrogates needed dataBufferAppend((char)value); } else if (value <= 0x000fffff) { // > 16 bits, surrogate needed dataBufferAppend((char)(0xd8 | ((value & 0x000ffc00) >> 10))); dataBufferAppend((char)(0xdc | (value & 0x0003ff))); } else { // too big for surrogate error("character reference " + value + " is too large for UTF-16", new Integer(value).toString(), null); } } /** * Parse a reference. * [69] EntityRef ::= '&' Name ';' * *NOTE: the '&' has already been read. * @param externalAllowed External entities are allowed here. */ void parseEntityRef (boolean externalAllowed) throws java.lang.Exception { String name; name = readNmtoken(true); require(';'); switch (getEntityType(name)) { case ENTITY_UNDECLARED: error("reference to undeclared entity", name, null); break; case ENTITY_INTERNAL: pushString(name, getEntityValue(name)); break; case ENTITY_TEXT: if (externalAllowed) { pushURL(name, getEntityPublicId(name), getEntitySystemId(name), null, null, null); } else { error("reference to external entity in attribute value.", name, null); } break; case ENTITY_NDATA: if (externalAllowed) { error("data entity reference in content", name, null); } else { error("reference to external entity in attribute value.", name, null); } break; } } /** * Parse a parameter entity reference. * [70] PEReference ::= '%' Name ';' * *NOTE: the '%' has already been read. */ void parsePEReference (boolean isEntityValue) throws java.lang.Exception { String name; name = "%" + readNmtoken(true); require(';'); switch (getEntityType(name)) { case ENTITY_UNDECLARED: error("reference to undeclared parameter entity", name, null); break; case ENTITY_INTERNAL: if (isEntityValue) { pushString(name, getEntityValue(name)); } else { pushString(name, " " + getEntityValue(name) + ' '); } break; case ENTITY_TEXT: if (isEntityValue) { pushString(null, " "); } pushURL(name, getEntityPublicId(name), getEntitySystemId(name), null, null, null); if (isEntityValue) { pushString(null, " "); } break; } } /** * Parse an entity declaration. * [71] EntityDecl ::= '<!ENTITY' S %Name S %EntityDef S? '>' * | '<!ENTITY' S '%' S %Name S %EntityDef S? '>' * [72] EntityDef ::= EntityValue | ExternalDef * [73] ExternalDef ::= ExternalID %NDataDecl? * [74] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * [75] NDataDecl ::= S %'NDATA' S %Name * *NOTE: the '<!ENTITY' has already been read. */ void parseEntityDecl () throws java.lang.Exception { char c; boolean peFlag = false; String name, value, notationName, ids[]; // Check for a parameter entity. requireWhitespace(); if (tryRead('%')) { peFlag = true; requireWhitespace(); } // Read the entity name, and prepend // '%' if necessary. name = readNmtoken(true); if (peFlag) { name = "%" + name; } // Read the entity value. requireWhitespace(); c = readCh(); unread(c); if (c == '"' || c == '\'') { // Internal entity. context = CONTEXT_ENTITYVALUE; value = readLiteral(LIT_CHAR_REF|LIT_PE_REF); context = CONTEXT_DTD; setInternalEntity(name,value); } else { // Read the external IDs ids = readExternalIds(false); if (ids[1] == null) { error("system identifer missing", name, null); } // Check for NDATA declaration. skipWhitespace(); if (tryRead("NDATA")) { requireWhitespace(); notationName = readNmtoken(true); setExternalDataEntity(name, ids[0], ids[1], notationName); } else { setExternalTextEntity(name, ids[0], ids[1]); } } // Finish the declaration. skipWhitespace(); require('>'); } /** * Parse a notation declaration. * [81] NotationDecl ::= '<!NOTATION' S %Name S %ExternalID S? '>' * *NOTE: the '<!NOTATION' has already been read. */ void parseNotationDecl () throws java.lang.Exception { String nname, ids[]; requireWhitespace(); nname = readNmtoken(true); requireWhitespace(); // Read the external identifiers. ids = readExternalIds(true); if (ids[0] == null && ids[1] == null) { error("external identifer missing", nname, null); } // Register the notation. setNotation(nname, ids[0], ids[1]); skipWhitespace(); require('>'); } /** * Parse PCDATA. * <pre> * [16] PCData ::= [^<&]* * </pre> * <p>The trick here is that the data stays in the dataBuffer without * necessarily being converted to a string right away. */ void parsePCData () throws java.lang.Exception { char c; // Start with a little cheat -- in most // cases, the entire sequence of // character data will already be in // the readBuffer; if not, fall through to // the normal approach. if (USE_CHEATS) { int lineAugment = 0; int columnAugment = 0; loop: for (int i = readBufferPos; i < readBufferLength; i++) { switch (readBuffer[i]) { case '\n': lineAugment++; columnAugment = 0; break; case '&': case '<': int start = readBufferPos; columnAugment++; readBufferPos = i; if (lineAugment > 0) { line += lineAugment; column = columnAugment; } else { column += columnAugment; } dataBufferAppend(readBuffer, start, i-start); return; default: columnAugment++; } } } // OK, the cheat didn't work; start over // and do it by the book. while (true) { c = readCh(); switch (c) { case '<': case '&': unread(c); return; default: dataBufferAppend(c); break; } } } ////////////////////////////////////////////////////////////////////// // High-level reading and scanning methods. ////////////////////////////////////////////////////////////////////// /** * Require whitespace characters. * [1] S ::= (#x20 | #x9 | #xd | #xa)+ */ void requireWhitespace () throws java.lang.Exception { char c = readCh(); if (isWhitespace(c)) { skipWhitespace(); } else { error("whitespace expected", c, null); } } /** * Parse whitespace characters, and leave them in the data buffer. */ void parseWhitespace () throws java.lang.Exception { char c = readCh(); while (isWhitespace(c)) { dataBufferAppend(c); c = readCh(); } unread(c); } /** * Skip whitespace characters. * [1] S ::= (#x20 | #x9 | #xd | #xa)+ */ void skipWhitespace () throws java.lang.Exception { // Start with a little cheat. Most of // the time, the white space will fall // within the current read buffer; if // not, then fall through. if (USE_CHEATS) { int lineAugment = 0; int columnAugment = 0; loop: for (int i = readBufferPos; i < readBufferLength; i++) { switch (readBuffer[i]) { case ' ': case '\t': case '\r': columnAugment++; break; case '\n': lineAugment++; columnAugment = 0; break; case '%': if (context == CONTEXT_DTD || context == CONTEXT_ENTITYVALUE) { break loop; } // else fall through... default: readBufferPos = i; if (lineAugment > 0) { line += lineAugment; column = columnAugment; } else { column += columnAugment; } return; } } } // OK, do it by the book. char c = readCh(); while (isWhitespace(c)) { c = readCh(); } unread(c); } /** * Read a name or name token. * [5] Name ::= (Letter | '_' | ':') (NameChar)* * [7] Nmtoken ::= (NameChar)+ * *NOTE: [6] is implemented implicitly where required. */ String readNmtoken (boolean isName) throws java.lang.Exception { char c; if (USE_CHEATS) { loop: for (int i = readBufferPos; i < readBufferLength; i++) { switch (readBuffer[i]) { case '%': if (context == CONTEXT_DTD || context == CONTEXT_ENTITYVALUE) { break loop; } // else fall through... case '<': case '>': case '&': case ',': case '|': case '*': case '+': case '?': case ')': case '=': case '\'': case '"': case '[': case ' ': case '\t': case '\r': case '\n': case ';': case '/': case '#': int start = readBufferPos; if (i == start) { error("name expected", readBuffer[i], null); } readBufferPos = i; return intern(readBuffer, start, i - start); } } } nameBufferPos = 0; // Read the first character. loop: while (true) { c = readCh(); switch (c) { case '%': case '<': case '>': case '&': case ',': case '|': case '*': case '+': case '?': case ')': case '=': case '\'': case '"': case '[': case ' ': case '\t': case '\n': case '\r': case ';': case '/': unread(c); if (nameBufferPos == 0) { error("name expected", null, null); } String s = intern(nameBuffer,0,nameBufferPos); nameBufferPos = 0; return s; default: nameBuffer = (char[])extendArray(nameBuffer, nameBuffer.length, nameBufferPos); nameBuffer[nameBufferPos++] = c; } } } /** * Read a literal. * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' * | "'" ([^<&'] | Reference)* "'" * [11] SystemLiteral ::= '"' URLchar* '"' | "'" (URLchar - "'")* "'" * [13] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' * | "'" ([^%&'] | PEReference | Reference)* "'" */ String readLiteral (int flags) throws java.lang.Exception { char delim, c; int startLine = line; // Find the delimiter. delim = readCh(); if (delim != '"' && delim != '\'' && delim != (char)0) { error("expected '\"' or \"'\"", delim, null); return null; } // Read the literal. try { c = readCh(); loop: while (c != delim) { switch (c) { // Literals never have line ends case '\n': case '\r': c = ' '; break; // References may be allowed case '&': if ((flags & LIT_CHAR_REF) > 0) { c = readCh(); if (c == '#') { parseCharRef(); c = readCh(); continue loop; // check the next character } else if ((flags & LIT_ENTITY_REF) > 0) { unread(c); parseEntityRef(false); c = readCh(); continue loop; } else { dataBufferAppend('&'); } } break; default: break; } dataBufferAppend(c); c = readCh(); } } catch (EOFException e) { error("end of input while looking for delimiter (started on line " + startLine + ')', null, new Character(delim).toString()); } // Normalise whitespace if necessary. if ((flags & LIT_NORMALIZE) > 0) { dataBufferNormalize(); } // Return the value. return dataBufferToString(); } /** * Try reading external identifiers. * <p>The system identifier is not required for notations. * @param inNotation Are we in a notation? * @return A two-member String array containing the identifiers. */ String[] readExternalIds (boolean inNotation) throws java.lang.Exception { char c; String ids[] = new String[2]; if (tryRead("PUBLIC")) { requireWhitespace(); ids[0] = readLiteral(LIT_NORMALIZE); // public id if (inNotation) { skipWhitespace(); if (tryRead('"') || tryRead('\'')) { ids[1] = readLiteral(0); } } else { requireWhitespace(); ids[1] = readLiteral(0); // system id } } else if (tryRead("SYSTEM")) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -