📄 utf8reader.java

📁 java1.6众多例子参考
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
                            fOffset = 1;                            return out - offset;                        }                        expectedByte(2, 2);                    }                    count++;                }                if ((b1 & 0xC0) != 0x80) {                    if (out > offset) {                        fBuffer[0] = (byte)b0;                        fBuffer[1] = (byte)b1;                        fOffset = 2;                        return out - offset;                    }                    invalidByte(2, 2, b1);                }                int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);                ch[out++] = (char)c;                count -= 1;                continue;            }            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]            // Unicode: [zzzz yyyy] [yyxx xxxx]            if ((b0 & 0xF0) == 0xE0) {                int b1 = -1;                if (++in < total) {                    b1 = fBuffer[in] & 0x00FF;                }                else {                    b1 = fInputStream.read();                    if (b1 == -1) {                        if (out > offset) {                            fBuffer[0] = (byte)b0;                            fOffset = 1;                            return out - offset;                        }                        expectedByte(2, 3);                    }                    count++;                }                if ((b1 & 0xC0) != 0x80                     || (b0 == 0xED && b1 >= 0xA0)                    || ((b0 & 0x0F) == 0 && (b1 & 0x20) == 0)) {                    if (out > offset) {                        fBuffer[0] = (byte)b0;                        fBuffer[1] = (byte)b1;                        fOffset = 2;                        return out - offset;                    }                    invalidByte(2, 3, b1);                }                int b2 = -1;                if (++in < total) {                    b2 = fBuffer[in] & 0x00FF;                }                else {                    b2 = fInputStream.read();                    if (b2 == -1) {                        if (out > offset) {                            fBuffer[0] = (byte)b0;                            fBuffer[1] = (byte)b1;                            fOffset = 2;                            return out - offset;                        }                        expectedByte(3, 3);                    }                    count++;                }                if ((b2 & 0xC0) != 0x80) {                    if (out > offset) {                        fBuffer[0] = (byte)b0;                        fBuffer[1] = (byte)b1;                        fBuffer[2] = (byte)b2;                        fOffset = 3;                        return out - offset;                    }                    invalidByte(3, 3, b2);                }                int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |                        (b2 & 0x003F);                ch[out++] = (char)c;                count -= 2;                continue;            }            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)            //          [1101 11yy] [yyxx xxxx] (low surrogate)            //          * uuuuu = wwww + 1            if ((b0 & 0xF8) == 0xF0) {                int b1 = -1;                if (++in < total) {                    b1 = fBuffer[in] & 0x00FF;                }                else {                    b1 = fInputStream.read();                    if (b1 == -1) {                        if (out > offset) {                            fBuffer[0] = (byte)b0;                            fOffset = 1;                            return out - offset;                        }                        expectedByte(2, 4);                    }                    count++;                }                if ((b1 & 0xC0) != 0x80                    || ((b1 & 0x30) == 0 && (b0 & 0x07) == 0)) {                    if (out > offset) {                        fBuffer[0] = (byte)b0;                        fBuffer[1] = (byte)b1;                        fOffset = 2;                        return out - offset;                    }                    invalidByte(2, 4, b1);                }                int b2 = -1;                if (++in < total) {                    b2 = fBuffer[in] & 0x00FF;                }                else {                    b2 = fInputStream.read();                    if (b2 == -1) {                        if (out > offset) {                            fBuffer[0] = (byte)b0;                            fBuffer[1] = (byte)b1;                            fOffset = 2;                            return out - offset;                        }                        expectedByte(3, 4);                    }                    count++;                }                if ((b2 & 0xC0) != 0x80) {                    if (out > offset) {                        fBuffer[0] = (byte)b0;                        fBuffer[1] = (byte)b1;                        fBuffer[2] = (byte)b2;                        fOffset = 3;                        return out - offset;                    }                    invalidByte(3, 4, b2);                }                int b3 = -1;                if (++in < total) {                    b3 = fBuffer[in] & 0x00FF;                }                else {                    b3 = fInputStream.read();                    if (b3 == -1) {                        if (out > offset) {                            fBuffer[0] = (byte)b0;                            fBuffer[1] = (byte)b1;                            fBuffer[2] = (byte)b2;                            fOffset = 3;                            return out - offset;                        }                        expectedByte(4, 4);                    }                    count++;                }                if ((b3 & 0xC0) != 0x80) {                    if (out > offset) {                        fBuffer[0] = (byte)b0;                        fBuffer[1] = (byte)b1;                        fBuffer[2] = (byte)b2;                        fBuffer[3] = (byte)b3;                        fOffset = 4;                        return out - offset;                    }                    invalidByte(4, 4, b2);                }                // decode bytes into surrogate characters                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);                if (uuuuu > 0x10) {                    invalidSurrogate(uuuuu);                }                int wwww = uuuuu - 1;                int zzzz = b1 & 0x000F;                int yyyyyy = b2 & 0x003F;                int xxxxxx = b3 & 0x003F;                int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);                int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;                // set characters                ch[out++] = (char)hs;                ch[out++] = (char)ls;                count -= 2;                continue;            }            // error            if (out > offset) {                fBuffer[0] = (byte)b0;                fOffset = 1;                return out - offset;            }            invalidByte(1, 1, b0);        }        // return number of characters converted        if (DEBUG_READ) {            System.out.println("read(char[],"+offset+','+length+"): count="+count);        }        return count;    } // read(char[],int,int)    /**     * Skip characters.  This method will block until some characters are     * available, an I/O error occurs, or the end of the stream is reached.     *     * @param  n  The number of characters to skip     *     * @return    The number of characters actually skipped     *     * @exception  IOException  If an I/O error occurs     */    public long skip(long n) throws IOException {        long remaining = n;        final char[] ch = new char[fBuffer.length];        do {            int length = ch.length < remaining ? ch.length : (int)remaining;            int count = read(ch, 0, length);            if (count > 0) {                remaining -= count;            }            else {                break;            }        } while (remaining > 0);        long skipped = n - remaining;        return skipped;    } // skip(long):long    /**     * Tell whether this stream is ready to be read.     *     * @return True if the next read() is guaranteed not to block for input,     * false otherwise.  Note that returning false does not guarantee that the     * next read will block.     *     * @exception  IOException  If an I/O error occurs     */    public boolean ready() throws IOException {        return false;    } // ready()    /**     * Tell whether this stream supports the mark() operation.     */    public boolean markSupported() {        return false;    } // markSupported()    /**     * Mark the present position in the stream.  Subsequent calls to reset()     * will attempt to reposition the stream to this point.  Not all     * character-input streams support the mark() operation.     *     * @param  readAheadLimit  Limit on the number of characters that may be     *                         read while still preserving the mark.  After     *                         reading this many characters, attempting to     *                         reset the stream may fail.     *     * @exception  IOException  If the stream does not support mark(),     *                          or if some other I/O error occurs     */    public void mark(int readAheadLimit) throws IOException {        throw new IOException(fFormatter.formatMessage(fLocale, "OperationNotSupported", new Object[]{"mark()", "UTF-8"}));    } // mark(int)    /**     * Reset the stream.  If the stream has been marked, then attempt to     * reposition it at the mark.  If the stream has not been marked, then     * attempt to reset it in some way appropriate to the particular stream,     * for example by repositioning it to its starting point.  Not all     * character-input streams support the reset() operation, and some support     * reset() without supporting mark().     *     * @exception  IOException  If the stream has not been marked,     *                          or if the mark has been invalidated,     *                          or if the stream does not support reset(),     *                          or if some other I/O error occurs     */    public void reset() throws IOException {        fOffset = 0;        fSurrogate = -1;    } // reset()    /**     * Close the stream.  Once a stream has been closed, further read(),     * ready(), mark(), or reset() invocations will throw an IOException.     * Closing a previously-closed stream, however, has no effect.     *     * @exception  IOException  If an I/O error occurs     */    public void close() throws IOException {        fInputStream.close();    } // close()    //    // Private methods    //    /** Throws an exception for expected byte. */    private void expectedByte(int position, int count)        throws MalformedByteSequenceException {        throw new MalformedByteSequenceException(fFormatter,            fLocale,            XMLMessageFormatter.XML_DOMAIN,            "ExpectedByte",            new Object[] {Integer.toString(position), Integer.toString(count)});    } // expectedByte(int,int)    /** Throws an exception for invalid byte. */    private void invalidByte(int position, int count, int c)        throws MalformedByteSequenceException {        throw new MalformedByteSequenceException(fFormatter,            fLocale,            XMLMessageFormatter.XML_DOMAIN,            "InvalidByte",             new Object [] {Integer.toString(position), Integer.toString(count)});    } // invalidByte(int,int,int)    /** Throws an exception for invalid surrogate bits. */    private void invalidSurrogate(int uuuuu) throws MalformedByteSequenceException {        throw new MalformedByteSequenceException(fFormatter,            fLocale,            XMLMessageFormatter.XML_DOMAIN,            "InvalidHighSurrogate",             new Object[] {Integer.toHexString(uuuuu)});    } // invalidSurrogate(int)} // class UTF8Reader
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -