chartobyteutf8.java

来自「This is a resource based on j2me embedde」· Java 代码 · 共 162 行

JAVA
162
字号
/* * @(#)CharToByteUTF8.java	1.19 06/10/10 * * Copyright  1990-2008 Sun Microsystems, Inc. All Rights Reserved.   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER   *    * This program is free software; you can redistribute it and/or   * modify it under the terms of the GNU General Public License version   * 2 only, as published by the Free Software Foundation.    *    * This program is distributed in the hope that it will be useful, but   * WITHOUT ANY WARRANTY; without even the implied warranty of   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU   * General Public License version 2 for more details (a copy is   * included at /legal/license.txt).    *    * You should have received a copy of the GNU General Public License   * version 2 along with this work; if not, write to the Free Software   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA   * 02110-1301 USA    *    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa   * Clara, CA 95054 or visit www.sun.com if you need additional   * information or have any questions.  * */package sun.io;/** * UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter * It's represented like below. * * # Bits   Bit pattern * 1    7   0xxxxxxx * 2   11   110xxxxx 10xxxxxx * 3   16   1110xxxx 10xxxxxx 10xxxxxx * 4   21   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 5   26   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 6   31   1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * *     UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6 */public class CharToByteUTF8 extends CharToByteConverter {    private char highHalfZoneCode;        public int flush(byte[] output, int outStart, int outEnd)	throws MalformedInputException    {	if (highHalfZoneCode != 0) {	    highHalfZoneCode = 0;	    badInputLength = 0;	    throw new MalformedInputException();	}	byteOff = charOff = 0;	return 0;    }    /**     * Character conversion     */    public int convert(char[] input, int inOff, int inEnd,		       byte[] output, int outOff, int outEnd)	throws ConversionBufferFullException, MalformedInputException    {	char inputChar;	byte[] outputByte = new byte[6];	int inputSize;	int outputSize;	charOff = inOff;	byteOff = outOff;	if (highHalfZoneCode != 0) {	    inputChar = highHalfZoneCode;	    highHalfZoneCode = 0;	    if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {		// This is legal UTF16 sequence.		int ucs4 = (highHalfZoneCode - 0xd800) * 0x400		    + (input[inOff] - 0xdc00) + 0x10000;		output[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);		output[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));		output[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));		output[3] = (byte)(0x80 | (ucs4 & 0x3f));		charOff++;		highHalfZoneCode = 0;	    } else {		// This is illegal UTF16 sequence.		badInputLength = 0;		throw new MalformedInputException();	    }	}	while(charOff < inEnd) {	    inputChar = input[charOff];	    if (inputChar < 0x80) {		outputByte[0] = (byte)inputChar;		inputSize = 1;		outputSize = 1;	    } else if (inputChar < 0x800) {		outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f));		outputByte[1] = (byte)(0x80 | (inputChar & 0x3f));		inputSize = 1;		outputSize = 2;	    } else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {		// this is <high-half zone code> in UTF-16		if (charOff + 1 >= inEnd) {		    highHalfZoneCode = inputChar;		    break;		}		// check next char is valid <low-half zone code>		char lowChar = input[charOff + 1];		if (lowChar < 0xdc00 || lowChar > 0xdfff) {		    badInputLength = 1;		    throw new MalformedInputException();		}		int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00)		    + 0x10000;		outputByte[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);		outputByte[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));		outputByte[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));		outputByte[3] = (byte)(0x80 | (ucs4 & 0x3f));		outputSize = 4;		inputSize = 2;	    } else {		outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f);		outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));		outputByte[2] = (byte)(0x80 | (inputChar & 0x3f));		inputSize = 1;		outputSize = 3;	    } 	    if (byteOff + outputSize > outEnd) {		throw new ConversionBufferFullException();	    }	    for (int i = 0; i < outputSize; i++) {		output[byteOff++] = outputByte[i];	    }	    charOff += inputSize;	}	return byteOff - outOff;    }    public boolean canConvert(char ch) {	return true;    }    public int getMaxBytesPerChar() {	return 3;    }    public void reset() {	byteOff = charOff = 0;	highHalfZoneCode = 0;    }    public String getCharacterEncoding() {	return "UTF8";    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?