⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utf8utf16.java

📁 手机Wap浏览器源码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
                // lower 4 bits become higher 4 bits
                result[n] = (byte) ((b0 << 4) & 0xff);
            	//#ifdef debug
                //# if (DEBUG) System.out.println("-- 3b : half result[n] = " + result[n]);
                //#endif
                // 10xx xxyy : get the xx xx 4 bits, as lower 4
                result[n] |= (byte) (((b1 & 0x3c) >> 2) & 0xff);    // 3c = 0011 1100
            	//#ifdef debug
                //# if (DEBUG) System.out.println("-- 3b : result[n] = " + result[n]);
                //#endif

                // b1 lower 2 bits become higer 2 bits
                result[n+1] = (byte) (((b1 & 0x3) << 6) & 0xff);
                // b2 lower 6 bits, just take it as lower 6 bits
                result[n+1] |= (byte) ((b2 & 0x3f) & 0xff);    // 0x3f = 0011 1111 (bin)

                n+=2;  // ucs-2 always +2
                i+=2;
                continue;
            }

            // 2 bytes model : 0x80 - 0x7ff (special char)
            if ((utf8[i] & mask1) == mask1) 
            {
                b0 = utf8[i];
                b1 = utf8[i+1];

                // take 3 bits from b0 (110x xxyy , take xxx)
                result[n] = (byte) ( (b0>>2) & 0x7 );  // 0x7 = 111
                result[n+1] = (byte) (((b0 & 0x3) << 6) & 0xff );
                result[n+1] |= (byte) (b1 & 0x3f);

                n+=2;  // ucs-2 always +2
                i+=1;
                continue;
            }

            // sanity check
            if ((utf8[i] & mask0) != 0) {
            	//#ifdef debug
                //# System.err.println("error encoding");
                //#endif
                break; // just skip
            }

            result[n] = 0;  // filled up MSB = 0
            result[n+1] = utf8[i];  // as-is

            n+=2; // always + 2
        }

        byte[] ucs2 = new byte[n];
        System.arraycopy(result, 0, ucs2, 0, n);
        return ucs2;
    }


    // 
    // size=16
    //   61  2e  20  e6  97  a0  e5  80  be  e8  a7  92  e7  9a  84  3b
    // Encoding=UTF-8 : a. 无倾角的;
    //
    // UTF-16 size=16
    // 00  61  00  2e  00  20  65  e0  50  3e  89  d2  76  84  00  3b
    //
    // 65e0  703e  a9d2  7684
    //
    //
    // \u65e0\u503e\u89d2\u7684
    //
    // \u65e0\u503e\u89d2\u7684
    //#ifdef debug
    //# static boolean test_utf8_to_ucs2()
    //# {
        //# //   61  2e  20  e6  97  a0  e5  80  be  e8  a7  92  e7  9a  84  3b
        //# byte utf8[] = {  
             //# (byte)0x61, (byte)0x2e, (byte)0x20, (byte)0xe6, (byte)0x97
            //# ,(byte)0xa0, (byte)0xe5, (byte)0x80, (byte)0xbe, (byte)0xe8
            //# ,(byte)0xa7, (byte)0x92, (byte)0xe7, (byte)0x9a, (byte)0x84
            //# ,(byte)0x3b
        //# };
//# 
        //# byte ucs2[] = new byte[0];
        //# String str_utf8, str_ucs2;
        //# try {
            //# ucs2 = utf8_to_ucs2(utf8);
        //# } catch (Exception e) {
            //# System.err.println("ex: " + e);
        //# }
//# 
        //#ifdef debug
        //# printArray(utf8, 0, utf8.length, "UTF-8");
        //#endif
//# 
        //# // printArray(ucs2, 0, ucs2.length, "UCS-2"); 
        //# // // UCS-2 : java1.4 UnsupportedEncoding
        //# 
        //#ifdef debug
        //# printArray(ucs2, 0, ucs2.length, "UTF-16");  // UTF-16 = UCS-2
        //#endif
//# 
        //# try {
            //# str_utf8 = new String(utf8, 0, utf8.length, "UTF-8");
            //# str_ucs2 = new String(ucs2, 0, ucs2.length, "UTF-16");
//# 
            //# return str_utf8.equals(str_ucs2);
        //# } catch (Exception e) {
            //# System.err.println("test_utf8_to_ucs2 ex2: " + e);
            //# return false;
        //# }
//# 
        //# // return false;
    //# }
    //#endif

    // size=16 Encoding=UTF-16
    //   fe  ff  4f  60  59  7d  00  61  00  62  00  63  59  1f  4e  86
    // 你好abc够了
    // 
    // 1111 1110   1111 1111  (你好)
    //
    // case c:
    // 0x800    to      0xffff  :   1110 xxxx  10xx xxxx  10xx xxxx (16 bits)
    //
    // 1110 xxxx  10xx xxxx  10xx xxxx (16 bits)
    //      1111    11 1011    11 1111  (你好 UCS-2 or UTF-16)
    // ==
    //
    // 1110 1111  1011 1011  1011 1111  (你好 UTF-8)
    //   E    F     B    B     B    F 
    //   EF  BB  BF
    //
    //#ifdef debug
    //# static boolean test_ucs2_to_utf8()
    //# {
        //# String str = "你好abc够了";
        //# String utf8str ;
        //# byte ucs2[], utf8[] ;
//# 
        //# try {
            //# ucs2 = str.getBytes("UTF-16");
            //# utf8 = ucs2_to_utf8(ucs2);
//# 
            //#ifdef debug
            //# printArray(ucs2, 0, ucs2.length, "UTF-16");
            //# printArray(utf8, 0, utf8.length, "UTF-8");
            //#endif
//# 
            //# utf8str = new String(utf8, "UTF-8");
        	//#ifdef debug
            //# if (DEBUG) System.out.println(str + " == " + utf8str);
            //#endif
            //# return str.equals(utf8str);
        //# } catch (Exception e) {
            //# System.err.println( "test_ucs2_to_utf8 ex: " + e);
            //# return false;
        //# }
    //# }
    //#endif

    /**
     * do not remove, it's a useful reference.
     *
     * special note: when converting byte to integer, we want to 
     * reserve the "bit pattern" instead of the value.
     * e.g. when byte a = 0x80 (-128), bit pattern is 1000 0000
     * but int -128 has a different bit pattern, so the
     * actual value for equivalent int is +128 
     * byte(-128)   = 1000 0000
     * int(128)     = 0000 0000  0000 0000  0000 0000  1000 0000
     * int(-128)    = 1111 1111  1111 1111  1111 1111  1000 0000
     * 
     */
    /*public static boolean testByteToInt()
    {
        byte a = (byte)0x80;
        byte b = (byte)0xff;
        int aint = a & 0xff;    // using & 0xff to convert to int 
        int bint = b & 0xff;    // important for bit-wise operation
        int awrong = a ;
        int bwrong = b ;

        if (DEBUG) {
            System.out.println("a=" + a + "  b=" + b);
            System.out.println("aint=" + aint + "  bint=" + bint);
            System.out.println("awrong=" + awrong + "  bwrong=" + bwrong);
        }
        return (a & 0xff) == (aint & 0xff);
    }*/


/*    static void printArray(byte array[], int offset, int size, String encoding)
    {
        String str;
        int last = offset + size;
        int n;
        if (last > array.length) last = array.length;

        System.out.println("size=" + size + " Encoding=" + encoding );
        for(int i=offset; i<last; i++) {
            n = array[i] & 0xff;  // a simple & can avoid negative issue
            // n = (array[i]>=0) ? (array[i]) : (array[i] + 256); // work
            // n = array[i]; // this is buggy
            str = Integer.toHexString(n);
            if (str.length() < 2) {
                str = "0" + str;
            }
            System.out.print("  " + str);
        }
        try {
            System.out.println("\n"
                    + (new String(array, offset, size, encoding)));
        } catch (Exception e)
        {
            System.err.println("printArray new String ex: " + e);
        }

    }
*/

    // turn on DEBUG for more debug messages
    //#ifdef debug
    //# public final static boolean DEBUG = false;
    //#endif

/*    public static void main(String arg[])
    {
        System.out.println("testBytetoInt(): " + testByteToInt());
        System.out.println("");
        System.out.println("test_utf8_to_ucs2(): " + test_utf8_to_ucs2());
        System.out.println("");
        System.out.println("test_ucs2_to_utf8(): " + test_ucs2_to_utf8());
        System.out.println("");
    }*/
    
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -