utfops.java

来自「嵌入式数据库Berkeley DB-4.5.20源代码」· Java 代码 · 共 282 行
JAVA
282 行
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2000-2006 *      Oracle Corporation.  All rights reserved. * * $Id: UtfOps.java,v 12.3 2006/08/31 18:14:09 bostic Exp $ */package com.sleepycat.util;/** * UTF operations with more flexibility than is provided by DataInput and * DataOutput. * * @author Mark Hayes */public class UtfOps {    private static byte[] EMPTY_BYTES = {};    private static String EMPTY_STRING = "";    /**     * Returns the byte length of a null terminated UTF string, not including     * the terminator.     *     * @param bytes the data containing the UTF string.     *     * @param offset the beginning of the string the measure.     *     * @throws IndexOutOfBoundsException if no zero terminator is found.     *     * @return the number of bytes.     */    public static int getZeroTerminatedByteLength(byte[] bytes, int offset)        throws IndexOutOfBoundsException {        int len = 0;        while (bytes[offset++] != 0) {            len++;        }        return len;    }    /**     * Returns the byte length of the UTF string that would be created by     * converting the given characters to UTF.     *     * @param chars the characters that would be converted.     *     * @return the byte length of the equivalent UTF data.     */    public static int getByteLength(char[] chars) {        return getByteLength(chars, 0, chars.length);    }    /**     * Returns the byte length of the UTF string that would be created by     * converting the given characters to UTF.     *     * @param chars the characters that would be converted.     *     * @param offset the first character to be converted.     *     * @param length the number of characters to be converted.     *     * @return the byte length of the equivalent UTF data.     */    public static int getByteLength(char[] chars, int offset, int length) {        int len = 0;        length += offset;        for (int i = offset; i < length; i++) {            int c = chars[i];            if ((c >= 0x0001) && (c <= 0x007F)) {                len++;            } else if (c > 0x07FF) {                len += 3;            } else {                len += 2;            }        }        return len;    }    /**     * Returns the number of characters represented by the given UTF string.     *     * @param bytes the UTF string.     *     * @return the number of characters.     *     * @throws IndexOutOfBoundsException if a UTF character sequence at the end     * of the data is not complete.     *     * @throws IllegalArgumentException if an illegal UTF sequence is     * encountered.     */    public static int getCharLength(byte[] bytes)        throws IllegalArgumentException, IndexOutOfBoundsException {        return getCharLength(bytes, 0, bytes.length);    }    /**     * Returns the number of characters represented by the given UTF string.     *     * @param bytes the data containing the UTF string.     *     * @param offset the first byte to be converted.     *     * @param length the number of byte to be converted.     *     * @throws IndexOutOfBoundsException if a UTF character sequence at the end     * of the data is not complete.     *     * @throws IllegalArgumentException if an illegal UTF sequence is     * encountered.     */    public static int getCharLength(byte[] bytes, int offset, int length)        throws IllegalArgumentException, IndexOutOfBoundsException {        int charCount = 0;        length += offset;        while (offset < length) {            switch ((bytes[offset] & 0xff) >> 4) {            case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:                offset++;                break;            case 12: case 13:                offset += 2;                break;            case 14:                offset += 3;                break;            default:                throw new IllegalArgumentException();            }            charCount++;        }        return charCount;    }    /**     * Converts byte arrays into character arrays.     *     * @param bytes the source byte data to convert     *     * @param byteOffset the offset into the byte array at which     * to start the conversion     *     * @param chars the destination array     *     * @param charOffset the offset into chars at which to begin the copy     *     * @param len the amount of information to copy into chars     *     * @param isByteLen if true then len is a measure of bytes, otherwise     * len is a measure of characters     *     * @throws IndexOutOfBoundsException if a UTF character sequence at the end     * of the data is not complete.     *     * @throws IllegalArgumentException if an illegal UTF sequence is     * encountered.     */    public static int bytesToChars(byte[] bytes, int byteOffset,                                   char[] chars, int charOffset,                                   int len, boolean isByteLen)        throws IllegalArgumentException, IndexOutOfBoundsException {        int char1, char2, char3;        len += isByteLen ? byteOffset : charOffset;        while ((isByteLen ? byteOffset : charOffset) < len) {            char1 = bytes[byteOffset++] & 0xff;            switch ((char1 & 0xff) >> 4) {            case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:                chars[charOffset++] = (char) char1;                break;            case 12: case 13:                char2 = bytes[byteOffset++];                if ((char2 & 0xC0) != 0x80) {                    throw new IllegalArgumentException();                }                chars[charOffset++] = (char)(((char1 & 0x1F) << 6) |                                             (char2 & 0x3F));                break;            case 14:                char2 = bytes[byteOffset++];                char3 = bytes[byteOffset++];                if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))                    throw new IllegalArgumentException();                chars[charOffset++] = (char)(((char1 & 0x0F) << 12) |                                             ((char2 & 0x3F) << 6)  |                                             ((char3 & 0x3F) << 0));                break;            default:                throw new IllegalArgumentException();            }        }        return byteOffset;    }    /**     * Converts character arrays into byte arrays.     *     * @param chars the source character data to convert     *     * @param charOffset the offset into the character array at which     * to start the conversion     *     * @param bytes the destination array     *     * @param byteOffset the offset into bytes at which to begin the copy     *     * @param charLength the length of characters to copy into bytes     */    public static void charsToBytes(char[] chars, int charOffset,                                    byte[] bytes, int byteOffset,                                    int charLength) {        charLength += charOffset;        for (int i = charOffset; i < charLength; i++) {            int c = chars[i];            if ((c >= 0x0001) && (c <= 0x007F)) {                bytes[byteOffset++] = (byte) c;            } else if (c > 0x07FF) {                bytes[byteOffset++] = (byte) (0xE0 | ((c >> 12) & 0x0F));                bytes[byteOffset++] = (byte) (0x80 | ((c >>  6) & 0x3F));                bytes[byteOffset++] = (byte) (0x80 | ((c >>  0) & 0x3F));            } else {                bytes[byteOffset++] = (byte) (0xC0 | ((c >>  6) & 0x1F));                bytes[byteOffset++] = (byte) (0x80 | ((c >>  0) & 0x3F));            }        }    }    /**     * Converts byte arrays into strings.     *     * @param bytes the source byte data to convert     *     * @param offset the offset into the byte array at which     * to start the conversion     *     * @param length the number of bytes to be converted.     *     * @return the string.     *     * @throws IndexOutOfBoundsException if a UTF character sequence at the end     * of the data is not complete.     *     * @throws IllegalArgumentException if an illegal UTF sequence is     * encountered.     */    public static String bytesToString(byte[] bytes, int offset, int length)        throws IllegalArgumentException, IndexOutOfBoundsException {        if (length == 0) return EMPTY_STRING;        int charLen = UtfOps.getCharLength(bytes, offset, length);        char[] chars = new char[charLen];        UtfOps.bytesToChars(bytes, offset, chars, 0, length, true);        return new String(chars, 0, charLen);    }    /**     * Converts strings to byte arrays.     *     * @param string the string to convert.     *     * @return the UTF byte array.     */    public static byte[] stringToBytes(String string) {        if (string.length() == 0) return EMPTY_BYTES;        char[] chars = string.toCharArray();        byte[] bytes = new byte[UtfOps.getByteLength(chars)];        UtfOps.charsToBytes(chars, 0, bytes, 0, chars.length);        return bytes;    }}
utfops.java - 源码说明

本页面展示了「嵌入式数据库Berkeley DB-4.5.20源代码」中的 utfops.java 源码文件，采用 Java 编程语言编写，共 282 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Berkeley相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?