📄 ucsreader.java

📁 电子地图服务器,搭建自己的地图服务
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
                ch[offset + charsRead] = fCharBuf[--fCharCount];
                charsRead++;
            } else {
                break;
            }
        }

        // Reading remaining chars from InputStream.
        if (0 != (length - charsRead)) {
            /*
             * Each output char (two for supplementary characters) will require
             * us to read 4 input bytes. But as we cannot predict how many
             * supplementary chars we will encounter, so we should try to read
             * maximum possible number.
             */
            int byteLength = (length - charsRead) << 2;

            if (byteLength > fBuffer.length) {
                byteLength = fBuffer.length;
            }

            int count = fInputStream.read(fBuffer, 0, byteLength);

            if (-1 == count) {
                return (0 == charsRead) ? (-1) : charsRead;
            } else {
                // try and make count be a multiple of the number of bytes we're
                // looking for (simply reading 1 to 3 bytes from input stream to
                // ensure the last code point is complete)
                // this looks ugly, but it avoids an if at any rate...
                int numToRead = ((4 - (count & 3)) & 3);

                for (int i = 0; i < numToRead; i++) {
                    int charRead = fInputStream.read();

                    if (charRead == -1) {
                        // end of input; something likely went wrong! Pad buffer
                        // with zeros.
                        for (int j = i; j < numToRead; j++)
                            fBuffer[count + j] = 0;

                        break;
                    } else {
                        fBuffer[count + i] = (byte) charRead;
                    }
                }

                count += numToRead;

                // now count is a multiple of the right number of bytes
                int numChars = count >> 2;
                int curPos = 0;

                /*
                 * `i` is index of currently processed char from InputStream.
                 * `charsCount` also counts number of chars that were (possibly)
                 * read from internal char buffer.
                 */
                int charsCount = charsRead;
                int i;

                for (i = 0; (i < numChars) && (length >= charsCount); i++) {
                    int b0 = fBuffer[curPos++] & 0xff;
                    int b1 = fBuffer[curPos++] & 0xff;
                    int b2 = fBuffer[curPos++] & 0xff;
                    int b3 = fBuffer[curPos++] & 0xff;

                    int codepoint;

                    if (UCS4BE == fEncoding) {
                        codepoint = ((b0 << 24) + (b1 << 16) + (b2 << 8) + b3);
                    } else {
                        codepoint = ((b3 << 24) + (b2 << 16) + (b1 << 8) + b0);
                    }

                    // Again, validity of this codepoint is never checked, this
                    // can yield problems sometimes.
                    if (!isSupplementaryCodePoint(codepoint)) {
                        ch[offset + charsCount] = (char) codepoint;
                        charsCount++;
                    } else {
                        // Checking if we can put another 2 chars in buffer.
                        if (2 <= (length - charsCount)) {
                            int cp1 = (codepoint - 0x10000) & 0xFFFFF;
                            ch[offset + charsCount] = (char) (0xD800 + (cp1 >>> 10));
                            ch[offset + charsCount + 1] = (char) (0xDC00 + (cp1 & 0x3FF));
                            charsCount += 2;
                        } else {
                            break; // END for
                        }
                    }
                } // END for

                // Storing data, that possibly remain in `fBuffer` into internal
                // char buffer for future use :)
                curPos = (numChars << 2) - 1;

                for (int k = numChars; k > i; k--) {
                    // Reading bytes in reverse order
                    int b3 = fBuffer[curPos--] & 0xff;
                    int b2 = fBuffer[curPos--] & 0xff;
                    int b1 = fBuffer[curPos--] & 0xff;
                    int b0 = fBuffer[curPos--] & 0xff;

                    int codepoint;

                    if (UCS4BE == fEncoding) {
                        codepoint = ((b0 << 24) + (b1 << 16) + (b2 << 8) + b3);
                    } else {
                        codepoint = ((b3 << 24) + (b2 << 16) + (b1 << 8) + b0);
                    }

                    // Look if we need to increase buffer size
                    if (2 > (fCharBuf.length - k)) {
                        char[] newBuf = new char[fCharBuf.length << 1];
                        System.arraycopy(fCharBuf, 0, newBuf, 0, fCharBuf.length);
                        fCharBuf = newBuf;
                    }

                    if (!isSupplementaryCodePoint(codepoint)) {
                        fCharBuf[fCharCount++] = (char) codepoint;
                    } else {
                        int cp1 = (codepoint - 0x10000) & 0xFFFFF;
                        // In this case store low surrogate code unit first, so that
                        // it can be read back after high one.
                        fCharBuf[fCharCount++] = (char) (0xDC00 + ((char) cp1 & 0x3FF));
                        fCharBuf[fCharCount++] = (char) (0xD800 + (cp1 >>> 10));
                    }
                } // END for

                return charsCount;
            } // END if (-1 == count) ELSE
        } // END if (0 != (length - charsRead))

        return charsRead;
    } // read(char[],int,int)

    /**
     * Read <code>UCS-2</code> characters into a portion of an array.
     * This method will block until some input is available, an I/O
     * error occurs, or the end of the stream is reached.
     * <p>
     * In original <code>UCSReader</code> this code was part of
     * <code>read(char[], int, int)</code> method, but I removed it
     * from there to reduce complexity of the latter.
     * </p>
     *
     * @param      ch      destination buffer
     * @param      offset  offset at which to start storing characters
     * @param      length  maximum number of characters to read
     *
     * @return     The number of characters read, or <code>-1</code>
     *             if the end of the stream has been reached
     *
     * @exception  IOException  If an I/O error occurs
     */
    protected int readUCS2(char[] ch, int offset, int length)
        throws IOException {
        int byteLength = length << 1;

        if (byteLength > fBuffer.length) {
            byteLength = fBuffer.length;
        }

        int count = fInputStream.read(fBuffer, 0, byteLength);

        if (count == -1) {
            return -1;
        }

        // try and make count be a multiple of the number of bytes we're
        // looking for (simply reading 1 to 3 bytes from input stream to
        // ensure the last code point is complete)
        int numToRead = count & 1;

        if (numToRead != 0) {
            count++;

            int charRead = fInputStream.read();

            if (charRead == -1) { // end of input; something likely went
                                  // wrong! Pad buffer with nulls.
                fBuffer[count] = 0;
            } else {
                fBuffer[count] = (byte) charRead;
            }
        }

        // now count is a multiple of the right number of bytes
        int numChars = count >> 1;
        int curPos = 0;

        for (int i = 0; i < numChars; i++) {
            int b0 = fBuffer[curPos++] & 0xff;
            int b1 = fBuffer[curPos++] & 0xff;

            if (fEncoding == UCS2BE) {
                ch[offset + i] = (char) ((b0 << 8) + b1);
            } else {
                ch[offset + i] = (char) ((b1 << 8) + b0);
            }
        }

        return numChars;
    } // END readUCS2(char[], int, int)

    /**
     * Skip characters.  This method will block until some characters are
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * @param  n  The number of characters to skip
     *
     * @return    The number of characters actually skipped
     *
     * @exception  IOException  If an I/O error occurs
     */
    public long skip(long n) throws IOException {
        /*
         * charWidth will represent the number of bits to move
         * n leftward to get num of bytes to skip, and then move the result
         * rightward
         * to get num of chars effectively skipped.
         * The trick with &'ing, as with elsewhere in this dcode, is
         * intended to avoid an expensive use of / that might not be optimized
         * away.
         */
        int charWidth = (fEncoding >= 4) ? 2 : 1;
        long bytesSkipped = fInputStream.skip(n << charWidth);

        if ((bytesSkipped & (charWidth | 1)) == 0) {
            return bytesSkipped >>> charWidth;
        }

        return (bytesSkipped >>> charWidth) + 1;
    } // skip(long):long

    /**
     * Tell whether this stream is ready to be read.
     *
     * @return True if the next read() is guaranteed not to block for input,
     * false otherwise.  Note that returning false does not guarantee that the
     * next read will block.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public boolean ready() throws IOException {
        return false;
    } // ready()

    /**
     * Tell whether this stream supports the mark() operation.
     */
    public boolean markSupported() {
        return fInputStream.markSupported();
    } // markSupported()

    /**
     * Mark the present position in the stream.  Subsequent calls to
     * <code>reset</code> will attempt to reposition the stream to this point.
     * Not all character-input streams support the <code>mark</code> operation.
     * This is one of them :) It relies on marking facilities of underlying
     * byte stream.
     *
     * @param  readAheadLimit  Limit on the number of characters that may be
     *                         read while still preserving the mark.  After
     *                         reading this many characters, attempting to
     *                         reset the stream may fail.
     *
     * @exception  IOException  If the stream does not support
     *                          <code>mark</code>, or if some other I/O error
     *                          occurs
     */
    public void mark(int readAheadLimit) throws IOException {
        fInputStream.mark(readAheadLimit);
    } // mark(int)

    /**
     * Reset the stream.  If the stream has been marked, then attempt to
     * reposition it at the mark.  If the stream has not been marked, then
     * attempt to reset it in some way appropriate to the particular stream,
     * for example by repositioning it to its starting point. This stream
     * implementation does not support <code>mark</code>/<code>reset</code>
     * by itself, it relies on underlying byte stream in this matter.
     *
     * @exception  IOException  If the stream has not been marked,
     *                          or if the mark has been invalidated,
     *                          or if the stream does not support reset(),
     *                          or if some other I/O error occurs
     */
    public void reset() throws IOException {
        fInputStream.reset();
    } // reset()

    /**
     * Close the stream.  Once a stream has been closed, further
     * <code>read</code>, <code>ready</code>, <code>mark</code>,
     * or <code>reset</code> invocations will throw an IOException.
     * Closing a previously-closed stream, however, has no effect.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public void close() throws IOException {
        fInputStream.close();
        fInputStream = null;
        fCharBuf = null;
        fBuffer = null;
    } // close()

    /**
     * Returns the encoding currently in use by this character stream.
     *
     * @return Encoding of this stream. Either ISO-10646-UCS-2 or
     *         ISO-10646-UCS-4. Problem is that this string doesn't indicate
     *         the byte order of that encoding. What to do, then? Unlike
     *         UTF-16 byte order cannot be made part of the encoding name
     *         in this case and still can be critical. Currently you can
     *         find out the byte order by invoking <code>getByteOrder</code>
     *         method.
     */
    public String getEncoding() {
        if (4 > fEncoding) {
            return "ISO-10646-UCS-2";
        } else {
            return "ISO-10646-UCS-4";
        }
    }

    /**
     * Returns byte order ("endianness") of the encoding currently in use by
     * this character stream. This is a string with two possible values:
     * <code>LITTLE_ENDIAN</code> and <code>BIG_ENDIAN</code>. Maybe using
     * a named constant is a better alternative, but I just don't like them.
     * But feel free to change this behavior if you think that would be
     * better.
     *
     * @return <code>LITTLE_ENDIAN</code> or <code>BIG_ENDIAN</code> depending
     *         on byte order of current encoding of this stream.
     */
    public String getByteOrder() {
        if ((1 == fEncoding) || (4 == fEncoding)) {
            return "LITTLE_ENDIAN";
        } else {
            return "BIG_ENDIAN";
        }
    }

    /**
     * Determines whether the specified character (Unicode code point)
     * is in the supplementary character range. The method call is
     * equivalent to the expression:
     * <blockquote><pre>
     * codePoint >= 0x10000 && codePoint <= 0x10ffff
     * </pre></blockquote>
     *
     * Stolen from JDK 1.5 <code>java.lang.Character</code> class in
     * order to provide JDK 1.4 compatibility.
     *
     * @param  codePoint the character (Unicode code point) to be tested
     * @return <code>true</code> if the specified character is in the Unicode
     *         supplementary character range; <code>false</code> otherwise.
     */
    protected boolean isSupplementaryCodePoint(int codePoint) {
        return (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) && (codePoint <= MAX_CODE_POINT);
    }
} // class UCSReader
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -