📄 parser.java
字号:
return ("CDATA".equals(str) == true)? 'c': '?'; case 'F': return ("FIXED".equals(str) == true)? 'F': '?'; default: break; } break; case 6: // IDREFS, ENTITY switch (mBuff[1]) { case 'I': return ("IDREFS".equals(str) == true)? 'R': '?'; case 'E': return ("ENTITY".equals(str) == true)? 'n': '?'; default: break; } break; case 7: // NMTOKEN, IMPLIED, ATTLIST, ELEMENT switch (mBuff[1]) { case 'I': return ("IMPLIED".equals(str) == true)? 'I': '?'; case 'N': return ("NMTOKEN".equals(str) == true)? 't': '?'; case 'A': return ("ATTLIST".equals(str) == true)? 'a': '?'; case 'E': return ("ELEMENT".equals(str) == true)? 'e': '?'; default: break; } break; case 8: // ENTITIES, NMTOKENS, NOTATION, REQUIRED switch (mBuff[2]) { case 'N': return ("ENTITIES".equals(str) == true)? 'N': '?'; case 'M': return ("NMTOKENS".equals(str) == true)? 'T': '?'; case 'O': return ("NOTATION".equals(str) == true)? 'o': '?'; case 'E': return ("REQUIRED".equals(str) == true)? 'Q': '?'; default: break; } break; default: break; } return '?'; } /** * Reads a single or double quotted string in to the buffer. * * This method resolves entities inside a string unless the parser * parses DTD. * * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' or '-' - no normalization. * @exception SAXException * @exception IOException */ private void bqstr(char flag) throws SAXException, IOException { Input inp = mInp; // remember the original input mBuffIdx = -1; bappend((char)0); // default offset to the colon char char ch; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // read a single or double quote switch (ch) { case ' ': case '\n': case '\r': case '\t': break; case '\'': st = 2; // read a single quoted string break; case '\"': st = 3; // read a double quoted string break; default: panic(FAULT); break; } break; case 2: // read a single quoted string case 3: // read a double quoted string switch (ch) { case '\'': if ((st == 2) && (mInp == inp)) st = -1; else bappend(ch); break; case '\"': if ((st == 3) && (mInp == inp)) st = -1; else bappend(ch); break; case '&': ent(' '); break; case '%': pent(flag); break; case '\r': // EOL processing [#2.11] if (flag != ' ') { if (next() != '\n') back(); ch = '\n'; } default: // This implements attribute value normalization as // described in the XML specification [#3.3.3]. switch (flag) { case 'i': // non CDATA normalization switch (ch) { case ' ': case '\n': case '\t': if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) bappend(' '); break; default: bappend(ch); break; } break; case 'c': // CDATA normalization switch (ch) { case '\n': case '\t': bappend(' '); break; default: bappend(ch); break; } break; default: // no normalization bappend(ch); break; } break; } break; default: panic(FAULT); } } // There is maximum one space at the end of the string in // i-mode (non CDATA normalization) and it has to be removed. if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) mBuffIdx -= 1; } /** * Reports characters and empties the parser's buffer. */ private void bflash() throws SAXException { if (mBuffIdx >= 0) { // Textual data has been read mHand.characters(mBuff, 0, (mBuffIdx + 1)); mBuffIdx = -1; } } /** * Appends a character to parser's buffer. * * @param ch The character to append to the buffer. */ private void bappend(char ch) { try { mBuffIdx++; mBuff[mBuffIdx] = ch; } catch (Exception exp) { // Double the buffer size char buff[] = new char[mBuff.length << 1]; System.arraycopy(mBuff, 0, buff, 0, mBuff.length); mBuff = buff; mBuff[mBuffIdx] = ch; } } /** * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>, * <i>apos</i>, <i>quot</i>. * The initial state is 0x100. Any state belowe 0x100 is a built-in * entity replacement character. * * @param ch the next character of an entity name. */ private void eappend(char ch) { switch (mESt) { case 0x100: // "l" or "g" or "a" or "q" switch (ch) { case 'l': mESt = 0x101; break; case 'g': mESt = 0x102; break; case 'a': mESt = 0x103; break; case 'q': mESt = 0x107; break; default: mESt = 0x200; break; } break; case 0x101: // "lt" mESt = (ch == 't')? '<': (char)0x200; break; case 0x102: // "gt" mESt = (ch == 't')? '>': (char)0x200; break; case 0x103: // "am" or "ap" switch (ch) { case 'm': mESt = 0x104; break; case 'p': mESt = 0x105; break; default: mESt = 0x200; break; } break; case 0x104: // "amp" mESt = (ch == 'p')? '&': (char)0x200; break; case 0x105: // "apo" mESt = (ch == 'o')? (char)0x106: (char)0x200; break; case 0x106: // "apos" mESt = (ch == 's')? '\'': (char)0x200; break; case 0x107: // "qu" mESt = (ch == 'u')? (char)0x108: (char)0x200; break; case 0x108: // "quo" mESt = (ch == 'o')? (char)0x109: (char)0x200; break; case 0x109: // "quot" mESt = (ch == 't')? '\"': (char)0x200; break; case '<': // "lt" case '>': // "gt" case '&': // "amp" case '\'': // "apos" case '\"': // "quot" mESt = 0x200; default: break; } } /** * Sets up a new input source on the top of the input stack. * Noet, the first byte returned by the entity's byte stream has to be the * first byte in the entity. However, the parser does not expect the byte * order mask in both cases when encoding is provided by the input source. * * @param is A new input source to set up. * @exception IOException If any IO errors occur. * @exception SAXException If the input source cannot be read. */ private void setinp(InputSource is) throws SAXException, IOException { Reader reader = null; mChIdx = 0; mChLen = 0; mChars = mInp.chars; mInp.src = null; if (is.getCharacterStream() != null) { // Ignore encoding in the xml text decl. reader = is.getCharacterStream(); xml(reader); } else if (is.getByteStream() != null) { if (is.getEncoding() != null) { // Ignore encoding in the xml text decl. String encoding = is.getEncoding().toUpperCase(); if (encoding.equals("UTF-16")) reader = bom(is.getByteStream(), 'U'); // UTF-16 [#4.3.3] else reader = enc(encoding, is.getByteStream()); xml(reader); } else { // Get encoding from BOM or the xml text decl. reader = bom(is.getByteStream(), ' '); if (reader == null) { // Encoding is defined by the xml text decl. reader = enc("UTF-8", is.getByteStream()); reader = enc(xml(reader), is.getByteStream()); } else { // Encoding is defined by the BOM. xml(reader); } } } else { // There is no support for public/system identifiers. panic(FAULT); } mInp.src = reader; mInp.pubid = is.getPublicId(); mInp.sysid = is.getSystemId(); } /** * Determines the entity encoding. * * This method gets encoding from Byte Order Mask [#4.3.3] if any. * Note, the first byte returned by the entity's byte stream has * to be the first byte in the entity. Also, there is no support * for UCS-4. * * @param is A byte stream of the entity. * @param hint An encoding hint, character U means UTF-16. * @return a reader constructed from the BOM or UTF-8 by default. * @exception SAXException * @exception IOException */ private Reader bom(InputStream is, char hint) throws SAXException, IOException { int val = is.read(); switch (val) { case 0xef: // UTF-8 if (hint == 'U') // must be UTF-16 panic(FAULT); if (is.read() != 0xbb) panic(FAULT); if (is.read() != 0xbf) panic(FAULT); return new ReaderUTF8(is); case 0xfe: // UTF-16, big-endian if (is.read() != 0xff) panic(FAULT); return new ReaderUTF16(is, 'b'); case 0xff: // UTF-16, little-endian if (is.read() != 0xfe) panic(FAULT); return new ReaderUTF16(is, 'l'); case -1: mChars[mChIdx++] = EOS; return new ReaderUTF8(is); default: if (hint == 'U') // must be UTF-16 panic(FAULT); // Read the rest of UTF-8 character switch (val & 0xf0) { case 0xc0: case 0xd0: mChars[mChIdx++] = (char)(((val & 0x1f) << 6) | (is.read() & 0x3f)); break; case 0xe0: mChars[mChIdx++] = (char)(((val & 0x0f) << 12) | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f)); break; case 0xf0: // UCS-4 character throw new UnsupportedEncodingException(); default: mChars[mChIdx++] = (char)val; break; } return null; } } /** * Parses the xml text declaration. * * This method gets encoding from the xml text declaration [#4.3.1] if any. * The method assumes the buffer (mChars) is big enough to accomodate whole * xml text declaration. * * @param reader is entity reader. * @return The xml text declaration encoding or default UTF-8 encoding. * @exception SAXException * @exception IOException */ private String xml(Reader reader) throws SAXException, IOException { String str = null; String enc = "UTF-8"; char ch; int val; short st; // Read the xml text declaration into the buffer if (mChIdx != 0) { // The bom method have read ONE char into the buffer. st = (short)((mChars[0] == '<')? 1: -1); } else { st = 0; } while (st >= 0) { ch = ((val = reader.read()) >= 0)? (char)val: EOS; mChars[mChIdx++] = ch; switch (st) { case 0: // read '<' of xml declaration switch (ch) { case '<': st = 1; break; case 0xfeff: // the byte order mask ch = ((val = reader.read()) >= 0)? (char)val: EOS; mChars[mChIdx - 1] = ch; st = (short)((ch == '<')? 1: -1); break; default: st = -1; break; } break; case 1: // read '?' of xml declaration st = (short)((ch == '?')? 2: -1); break; case 2: // read 'x' of xml declaration st = (short)(((ch == 'x') || (ch == 'X'))? 3: -1); break; case 3: // read 'm' of xml declaration st = (short)(((ch == 'm') || (ch == 'M'))? 4: -1); break; case 4: // read 'l' of xml declaration st = (short)(((ch == 'l') || (ch == 'L'))? 5: -1); break; case 5: // read white space after 'xml' switch (ch) { case ' ': case '\t': case '\r': case '\n': st = 6; break; default: st = -1; break; } break; case 6: // read content of xml declaration if (ch == '?') st = 7; break; case 7: // read '>' after '?' of xml declaration st = (short)((ch != '>')? 6: -2); break; default: panic(FAULT); break; } } mChLen = mChIdx; mChIdx = 0; // If there is no xml text declaration, the encoding is default. if (st == -1) { return enc; } mChIdx = 5; // the first white space after "<?xml" // Parse the xml text declaration for (st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // skip spaces after the xml declaration name if (chtyp(ch) != ' ') { back(); st = 1; } break; case 1: // read xml declaration version case 2: // read xml declaration encoding or standalone case 3: // read xml declaration standalone switch (chtyp(ch)) { case 'a': case 'A': case '_': back(); str = name(false).toLowerCase(); if ("version".equals(str) == true) { if (st != 1) panic(FAULT); if ("1.0".equals(eqstr('=')) != true) panic(FAULT); st = 2; } else if ("encoding".equals(str) == true) { if (st != 2) panic(FAULT); enc = eqstr('=').toUpperCase(); st = 3; } else if ("standalone".equals(str) == true) { if (st == 1) panic(FAULT); str = eqstr('='); // BUG: the value has to be analized by the parser [#5.1] st = 4; } else { panic(FAULT); } break; case ' ': break; case '?': if (st == 1) panic(FAULT); back(); st = 4; break; default: panic(FAULT); } break; case 4: // end of xml declaration switch (chtyp(ch)) { case '?': if (next() != '>') panic(FAULT); if (mSt == 0) // the begining of the document mSt = 1; // misc before DTD st = -1; break; case ' ': break; default: panic(FAULT); } break; default: panic(FAULT); } } return enc; } /** * Sets up the document reader. * * @param name an encoding name. * @param is the document byte input stream. * @return a reader constructed from encoding name and input stream. * @exception UnsupportedEncodingException */ private Reader enc(String name, InputStream is) throws java.io.UnsupportedEncodingException { // DO NOT CLOSE curren
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -