xincludetextreader.java
来自「JAVA 所有包」· Java 代码 · 共 514 行 · 第 1/2 页
JAVA
514 行
} /** * XMLEntityManager cares about endian-ness, since it creates its own optimized * readers. Since we're just using generic Java readers for now, we're not caring * about endian-ness. If this changes, even more code needs to be copied from * XMLEntity manager. -- PJM */ protected String getEncodingName(InputStream stream) throws IOException { final byte[] b4 = new byte[4]; String encoding = null; // this has the potential to throw an exception // it will be fixed when we ensure the stream is rewindable (see note above) stream.mark(4); int count = stream.read(b4, 0, 4); stream.reset(); if (count == 4) { encoding = getEncodingName(b4); } return encoding; } /** * Removes the byte order mark from the stream, if * it exists and returns the encoding name. * * @param stream * @param encoding * @throws IOException */ protected String consumeBOM(InputStream stream, String encoding) throws IOException { byte[] b = new byte[3]; int count = 0; stream.mark(3); if (encoding.equals("UTF-8")) { count = stream.read(b, 0, 3); if (count == 3) { final int b0 = b[0] & 0xFF; final int b1 = b[1] & 0xFF; final int b2 = b[2] & 0xFF; if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) { // First three bytes are not BOM, so reset. stream.reset(); } } else { stream.reset(); } } else if (encoding.startsWith("UTF-16")) { count = stream.read(b, 0, 2); if (count == 2) { final int b0 = b[0] & 0xFF; final int b1 = b[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { return "UTF-16BE"; } else if (b0 == 0xFF && b1 == 0xFE) { return "UTF-16LE"; } } // First two bytes are not BOM, so reset. stream.reset(); } // We could do UTF-32, but since the getEncodingName() doesn't support that // we won't support it here. // To implement UTF-32, look for: 00 00 FE FF for big-endian // or FF FE 00 00 for little-endian return encoding; } /** * REVISIT: This code is taken from com.sun.org.apache.xerces.internal.impl.XMLEntityManager. * Is there any way we can share the code, without having it implemented twice? * I think we should make it public and static in XMLEntityManager. --PJM * * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where appropriate. * * @param b4 The first four bytes of the input. * @return the encoding name, or null if no encoding could be detected */ protected String getEncodingName(byte[] b4) { // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian return "UTF-16BE"; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian return "UTF-16LE"; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { return "UTF-8"; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) return "ISO-10646-UCS-4"; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) return "ISO-10646-UCS-4"; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) return "ISO-10646-UCS-4"; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) return "ISO-10646-UCS-4"; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... return "UTF-16BE"; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... return "UTF-16LE"; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here return "CP037"; } // this signals us to use the value from the encoding attribute return null; } // getEncodingName(byte[]):Object[] /** * Read the input stream as text, and pass the text on to the XIncludeHandler * using calls to characters(). This will read all of the text it can from the * resource. * * @throws IOException */ public void parse() throws IOException { fReader = getReader(fSource); fSource = null; int readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1); while (readSize != -1) { for (int i = 0; i < readSize; ++i) { char ch = fTempString.ch[i]; if (!isValid(ch)) { if (XMLChar.isHighSurrogate(ch)) { int ch2; // retrieve next character if (++i < readSize) { ch2 = fTempString.ch[i]; } // handle rare boundary case else { ch2 = fReader.read(); if (ch2 != -1) { fTempString.ch[readSize++] = (char) ch2; } } if (XMLChar.isLowSurrogate(ch2)) { // convert surrogates to a supplemental character int sup = XMLChar.supplemental(ch, (char)ch2); if (!isValid(sup)) { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "InvalidCharInContent", new Object[] { Integer.toString(sup, 16) }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "InvalidCharInContent", new Object[] { Integer.toString(ch2, 16) }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "InvalidCharInContent", new Object[] { Integer.toString(ch, 16) }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } } if (fHandler != null && readSize > 0) { fTempString.offset = 0; fTempString.length = readSize; fHandler.characters( fTempString, fHandler.modifyAugmentations(null, true)); } readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1); } } /** * Sets the input source on this text reader. * * @param source The XMLInputSource to use. */ public void setInputSource(XMLInputSource source) { fSource = source; } /** * Closes the stream. Call this after parse(), or when there is no longer any need * for this object. * * @throws IOException */ public void close() throws IOException { if (fReader != null) { fReader.close(); fReader = null; } } /** * Returns true if the specified character is a valid XML character * as per the rules of XML 1.0. * * @param ch The character to check. */ protected boolean isValid(int ch) { return XMLChar.isValid(ch); } /** * Sets the buffer size property for the reader which decides the chunk sizes that are parsed * by the reader at a time and passed to the handler * * @param bufferSize The size of the buffer desired */ protected void setBufferSize(int bufferSize) { if (fTempString.ch.length != ++bufferSize) { fTempString.ch = new char[bufferSize]; } } }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?