xincludetextreader.java

来自「JAVA 所有包」· Java 代码 · 共 514 行 · 第 1/2 页

JAVA
514
字号
    }    /**      * XMLEntityManager cares about endian-ness, since it creates its own optimized     * readers. Since we're just using generic Java readers for now, we're not caring     * about endian-ness.  If this changes, even more code needs to be copied from     * XMLEntity manager. -- PJM     */    protected String getEncodingName(InputStream stream) throws IOException {        final byte[] b4 = new byte[4];        String encoding = null;        // this has the potential to throw an exception        // it will be fixed when we ensure the stream is rewindable (see note above)        stream.mark(4);        int count = stream.read(b4, 0, 4);        stream.reset();        if (count == 4) {            encoding = getEncodingName(b4);        }        return encoding;    }    /**     * Removes the byte order mark from the stream, if     * it exists and returns the encoding name.     *      * @param stream     * @param encoding     * @throws IOException     */    protected String consumeBOM(InputStream stream, String encoding)        throws IOException {        byte[] b = new byte[3];        int count = 0;        stream.mark(3);        if (encoding.equals("UTF-8")) {            count = stream.read(b, 0, 3);            if (count == 3) {                final int b0 = b[0] & 0xFF;                 final int b1 = b[1] & 0xFF;                final int b2 = b[2] & 0xFF;                if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) {                    // First three bytes are not BOM, so reset.                    stream.reset();                }            }            else {                stream.reset();            }        }        else if (encoding.startsWith("UTF-16")) {            count = stream.read(b, 0, 2);            if (count == 2) {                final int b0 = b[0] & 0xFF;                final int b1 = b[1] & 0xFF;                if (b0 == 0xFE && b1 == 0xFF) {                    return "UTF-16BE";                }                else if (b0 == 0xFF && b1 == 0xFE) {                    return "UTF-16LE";                }            }            // First two bytes are not BOM, so reset.            stream.reset();        }        // We could do UTF-32, but since the getEncodingName() doesn't support that        // we won't support it here.        // To implement UTF-32, look for:  00 00 FE FF for big-endian        //                             or  FF FE 00 00 for little-endian        return encoding;    }    /**     * REVISIT: This code is taken from com.sun.org.apache.xerces.internal.impl.XMLEntityManager.     *          Is there any way we can share the code, without having it implemented twice?     *          I think we should make it public and static in XMLEntityManager. --PJM     *     * Returns the IANA encoding name that is auto-detected from     * the bytes specified, with the endian-ness of that encoding where appropriate.     *     * @param b4    The first four bytes of the input.     * @return the encoding name, or null if no encoding could be detected     */    protected String getEncodingName(byte[] b4) {        // UTF-16, with BOM        int b0 = b4[0] & 0xFF;        int b1 = b4[1] & 0xFF;        if (b0 == 0xFE && b1 == 0xFF) {            // UTF-16, big-endian            return "UTF-16BE";        }        if (b0 == 0xFF && b1 == 0xFE) {            // UTF-16, little-endian            return "UTF-16LE";        }        // UTF-8 with a BOM        int b2 = b4[2] & 0xFF;        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {            return "UTF-8";        }        // other encodings        int b3 = b4[3] & 0xFF;        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {            // UCS-4, big endian (1234)            return "ISO-10646-UCS-4";        }        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {            // UCS-4, little endian (4321)            return "ISO-10646-UCS-4";        }        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {            // UCS-4, unusual octet order (2143)            return "ISO-10646-UCS-4";        }        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {            // UCS-4, unusual octect order (3412)            return "ISO-10646-UCS-4";        }        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {            // UTF-16, big-endian, no BOM            // (or could turn out to be UCS-2...            return "UTF-16BE";        }        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {            // UTF-16, little-endian, no BOM            // (or could turn out to be UCS-2...            return "UTF-16LE";        }        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {            // EBCDIC            // a la xerces1, return CP037 instead of EBCDIC here            return "CP037";        }        // this signals us to use the value from the encoding attribute        return null;    } // getEncodingName(byte[]):Object[]    /**     * Read the input stream as text, and pass the text on to the XIncludeHandler     * using calls to characters().  This will read all of the text it can from the     * resource.     *      * @throws IOException     */    public void parse() throws IOException {                fReader = getReader(fSource);        fSource = null;        int readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1);        while (readSize != -1) {            for (int i = 0; i < readSize; ++i) {                char ch = fTempString.ch[i];                if (!isValid(ch)) {                    if (XMLChar.isHighSurrogate(ch)) {                        int ch2;                        // retrieve next character                        if (++i < readSize) {                            ch2 = fTempString.ch[i];                        }                        // handle rare boundary case                        else {                            ch2 = fReader.read();                            if (ch2 != -1) {                                fTempString.ch[readSize++] = (char) ch2;                            }                        }                        if (XMLChar.isLowSurrogate(ch2)) {                            // convert surrogates to a supplemental character                            int sup = XMLChar.supplemental(ch, (char)ch2);                            if (!isValid(sup)) {                                fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,                                                           "InvalidCharInContent",                                                            new Object[] { Integer.toString(sup, 16) },                                                           XMLErrorReporter.SEVERITY_FATAL_ERROR);                            }                        }                        else {                            fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,                                                       "InvalidCharInContent",                                                        new Object[] { Integer.toString(ch2, 16) },                                                       XMLErrorReporter.SEVERITY_FATAL_ERROR);                        }                    }                    else {                        fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,                                                   "InvalidCharInContent",                                                    new Object[] { Integer.toString(ch, 16) },                                                   XMLErrorReporter.SEVERITY_FATAL_ERROR);                    }                }            }            if (fHandler != null && readSize > 0) {                fTempString.offset = 0;                fTempString.length = readSize;                fHandler.characters(                    fTempString,                    fHandler.modifyAugmentations(null, true));            }            readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1);        }            }        /**     * Sets the input source on this text reader.     *      * @param source The XMLInputSource to use.     */    public void setInputSource(XMLInputSource source) {        fSource = source;    }        /**     * Closes the stream.  Call this after parse(), or when there is no longer any need     * for this object.     *      * @throws IOException     */    public void close() throws IOException {        if (fReader != null) {            fReader.close();            fReader = null;        }    }        /**     * Returns true if the specified character is a valid XML character     * as per the rules of XML 1.0.     *     * @param ch The character to check.     */    protected boolean isValid(int ch) {        return XMLChar.isValid(ch);    }        /**     * Sets the buffer size property for the reader which decides the chunk sizes that are parsed     * by the reader at a time and passed to the handler     *      * @param bufferSize The size of the buffer desired     */    protected void setBufferSize(int bufferSize) {        if (fTempString.ch.length != ++bufferSize) {            fTempString.ch = new char[bufferSize];        }    } }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?