utf_8_writer.java

来自「This is a resource based on j2me embedde」· Java 代码 · 共 207 行

JAVA
207
字号
/* *    * * Copyright  1990-2007 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER *  * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 only, as published by the Free Software Foundation. *  * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is * included at /legal/license.txt). *  * You should have received a copy of the GNU General Public License * version 2 along with this work; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA *  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa * Clara, CA 95054 or visit www.sun.com if you need additional * information or have any questions. */package com.sun.cldc.i18n.j2me;import java.io.*;/** * Writer for UTF-8 encoded output streams. NOTE: The UTF-8 writer only * supports UCS-2, or Unicode, to UTF-8 conversion. There is no support * for UTF-16 encoded characters outside of the Basic Multilingual Plane * (BMP). These are encoded in UTF-16 using previously reserved values * between U+D800 and U+DFFF. Additionally, the UTF-8 writer does not * support any character that requires 4 or more UTF-8 encoded bytes. */public class UTF_8_Writer extends com.sun.cldc.i18n.StreamWriter {    /** pending high surrogate code unit, or zero */    protected int pendingSurrogate;    /** This value replaces invalid characters     * (that is, surrogates code units without a pair) */    static final private int replacementValue = 0x3f;    /**     * Write a portion of an array of characters.     *     * @param  cbuf  Array of characters     * @param  off   Offset from which to start writing characters     * @param  len   Number of characters to write     *     * @exception  IOException  If an I/O error occurs     */    public void write(char cbuf[], int off, int len) throws IOException {        byte[] outputByte = new byte[4];     // Never more than 4 encoded bytes        int inputChar;        int outputSize;        int count = 0;        while (count < len) {            inputChar = 0xffff & cbuf[off + count];            if (0 != pendingSurrogate) {                if (0xdc00<=inputChar && inputChar<=0xdfff) {                //000u uuuu xxxx xxxx xxxx xxxx                //1101 10ww wwxx xxxx   1101 11xx xxxx xxxx                    final int highHalf = (pendingSurrogate & 0x03ff) + 0x0040;                    final int lowHalf = inputChar & 0x03ff;                    inputChar = (highHalf << 10) | lowHalf;                } else {                    // write replacement value instead of unpaired surrogate                    outputByte[0] = replacementValue;                    outputSize = 1;                    out.write(outputByte, 0, outputSize);                }                pendingSurrogate = 0;            }            if (inputChar < 0x80) {                outputByte[0] = (byte)inputChar;                outputSize = 1;            } else if (inputChar < 0x800) {                outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f));                outputByte[1] = (byte)(0x80 | (inputChar & 0x3f));                outputSize = 2;            } else if (0xd800<=inputChar && inputChar<=0xdbff) {                pendingSurrogate = inputChar;                outputSize = 0;            } else if (0xdc00<=inputChar && inputChar<=0xdfff) {                // unpaired surrogate                outputByte[0] = replacementValue;                outputSize = 1;            } else if (inputChar < 0x10000) {                outputByte[0] = (byte)(0xe0 | ((inputChar >> 12) & 0x0f));                outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));                outputByte[2] = (byte)(0x80 | (inputChar & 0x3f));                outputSize = 3;            } else {                /* 21 bits: 1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx                 * a aabb  bbbb cccc  ccdd dddd                */                outputByte[0] = (byte)(0xf0 | ((inputChar >> 18) & 0x07));                outputByte[1] = (byte)(0x80 | ((inputChar >> 12) & 0x3f));                outputByte[2] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));                outputByte[3] = (byte)(0x80 | (inputChar & 0x3f));                outputSize = 4;            }            out.write(outputByte, 0, outputSize);            count++;        }    }    /**     * Get the size in bytes of an array of chars.     *     * @param      cbuf   Source buffer     * @param      offset Offset at which to start counting character sizes     * @param      length number of characters to use for counting     *     * @return     number of bytes that the characters would be converted to     */    public int sizeOf(char[] cbuf, int offset, int length) {        int inputChar;        int outputSize;        int outputCount = 0;        int count = 0;        int localPendingSurrogate = pendingSurrogate;        while (count < length) {            inputChar = 0xffff & cbuf[offset + count];            if (0 != localPendingSurrogate) {                if (0xdc00<=inputChar && inputChar<=0xdfff) {                //000u uuuu xxxx xxxx xxxx xxxx                //1101 10ww wwxx xxxx   1101 11xx xxxx xxxx                    final int highHalf = (localPendingSurrogate & 0x03ff) + 0x0040;                    final int lowHalf = inputChar & 0x03ff;                    inputChar = (highHalf << 10) | lowHalf;                } else {                    // going to write replacement value instead of unpaired surrogate                    outputSize = 1;                    outputCount += outputSize;                }                localPendingSurrogate = 0;            }            if (inputChar < 0x80) {                outputSize = 1;            } else if (inputChar < 0x800) {                outputSize = 2;            } else if (0xd800<=inputChar && inputChar<=0xdbff) {                localPendingSurrogate = inputChar;                outputSize = 0;            } else if (0xdc00<=inputChar && inputChar<=0xdfff) {                // unpaired surrogate                // going to output replacementValue;                outputSize = 1;            } else if (inputChar < 0x10000) {                outputSize = 3;            } else {                /* 21 bits: 1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx                 * a aabb  bbbb cccc  ccdd dddd                */                outputSize = 4;            }            outputCount += outputSize;            count++;        }        return outputCount;    }    /**     * Open the writer.     *     * @param outputStream     * @param encoding encoding     * @return the writer     * @throws UnsupportedEncodingException     */    public Writer open(OutputStream outputStream, String encoding)            throws UnsupportedEncodingException {        pendingSurrogate = 0;        return super.open(outputStream,encoding);    }    /**     * Close the writer and the output stream.     *     * @throws IOException     */    public void close() throws IOException {        if (0 != pendingSurrogate) {            // write replacement value instead of the unpaired surrogate            byte[] outputByte = new byte[1];            outputByte[0] = replacementValue;            out.write(outputByte, 0, 1);        }        pendingSurrogate = 0;        super.close();    }    // flush() can do nothing with pendingSurrogate because the surrogate    // contains only a portion of the character code, and the second half    // is still expected to arrive.    // public void flush() throws IOException { super.flush(); }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?