codesetconversion.java

来自「JAVA 所有包」· Java 代码 · 共 690 行 · 第 1/2 页

JAVA
690
字号
                // CharBuffer returned by the decoder will set its limit                // to byte immediately after the last written byte.                resultingNumChars = charBuf.limit();                // IMPORTANT - It's possible the underlying char[] in the                //             CharBuffer returned by btc.decode(byteBuf)                //             is longer in length than the number of characters                //             decoded. Hence, the check below to ensure the                //             char[] returned contains all the chars that have                //             been decoded and no more.                if (charBuf.limit() == charBuf.capacity()) {                    buffer = charBuf.array();                } else {                    buffer = new char[charBuf.limit()];                    charBuf.get(buffer, 0, charBuf.limit()).position(0);                }                return buffer;            } catch (IllegalStateException ile) {                // There were a decoding operation already in progress		throw wrapper.btcConverterFailure( ile ) ;            } catch (MalformedInputException mie) {                // There were illegal Unicode char pairs		throw wrapper.badUnicodePair( mie ) ;            } catch (UnmappableCharacterException uce) {                // A character doesn't map to the desired code set.                // CORBA formal 00-11-03.		throw omgWrapper.charNotInCodeset( uce ) ;            } catch (CharacterCodingException cce) {                // If this happens, then a character decoding error occured.		throw wrapper.btcConverterFailure( cce ) ;            }        }        /**         * Utility method to find a CharsetDecoder in the         * cache or create a new one if necessary.  Throws an         * INTERNAL if the code set is unknown.         */        protected CharsetDecoder getConverter(String javaCodeSetName) {            CharsetDecoder result = null;            try {                result = cache.getByteToCharConverter(javaCodeSetName);                if (result == null) {                    Charset tmpCharset = Charset.forName(javaCodeSetName);                    result = tmpCharset.newDecoder();                    cache.setConverter(javaCodeSetName, result);                }            } catch(IllegalCharsetNameException icne) {                // This can only happen if one of our charset entries has                // an illegal name.		throw wrapper.invalidBtcConverterName( icne, javaCodeSetName ) ;            }            return result;        }    }    /**     * Special converter for UTF16 since it's required to optionally     * support a byte order marker while the internal Java converters     * either require it or require that it isn't there.     *     * The solution is to check for the byte order marker, and if we     * need to do something differently, switch internal converters.     */    private class UTF16BTCConverter extends JavaBTCConverter    {        private boolean defaultToLittleEndian;        private boolean converterUsesBOM = true;        private static final char UTF16_BE_MARKER = (char) 0xfeff;        private static final char UTF16_LE_MARKER = (char) 0xfffe;        // When there isn't a byte order marker, used the byte        // order specified.        public UTF16BTCConverter(boolean defaultToLittleEndian) {            super(OSFCodeSetRegistry.UTF_16);            this.defaultToLittleEndian = defaultToLittleEndian;        }        public char[] getChars(byte[] bytes, int offset, int numBytes) {            if (hasUTF16ByteOrderMarker(bytes, offset, numBytes)) {                if (!converterUsesBOM)                    switchToConverter(OSFCodeSetRegistry.UTF_16);                converterUsesBOM = true;                return super.getChars(bytes, offset, numBytes);            } else {                if (converterUsesBOM) {                    if (defaultToLittleEndian)                        switchToConverter(OSFCodeSetRegistry.UTF_16LE);                    else                        switchToConverter(OSFCodeSetRegistry.UTF_16BE);                    converterUsesBOM = false;                }                return super.getChars(bytes, offset, numBytes);            }        }        /**         * Utility method for determining if a UTF-16 byte order marker is present.         */        private boolean hasUTF16ByteOrderMarker(byte[] array, int offset, int length) {            // If there aren't enough bytes to represent the marker and data,            // return false.            if (length >= 4) {                int b1 = array[offset] & 0x00FF;                int b2 = array[offset + 1] & 0x00FF;                char marker = (char)((b1 << 8) | (b2 << 0));                                return (marker == UTF16_BE_MARKER || marker == UTF16_LE_MARKER);            } else                return false;        }        /**         * The current solution for dealing with UTF-16 in CORBA         * is that if our sun.io converter requires byte order markers,         * and then we see a CORBA wstring/wchar without them, we          * switch to the sun.io converter that doesn't require them.         */        private void switchToConverter(OSFCodeSetRegistry.Entry newCodeSet) {            // Use the getConverter method from our superclass.            btc = super.getConverter(newCodeSet.getName());        }    }    /**     * CTB converter factory for single byte or variable length encodings.     */    public CTBConverter getCTBConverter(OSFCodeSetRegistry.Entry codeset) {        int alignment = (!codeset.isFixedWidth() ?                         1 :                         codeset.getMaxBytesPerChar());                    return new JavaCTBConverter(codeset, alignment);    }    /**     * CTB converter factory for multibyte (mainly fixed) encodings.     *     * Because of the awkwardness with byte order markers and the possibility of      * using UCS-2, you must specify both the endianness of the stream as well as      * whether or not to use byte order markers if applicable.  UCS-2 has no byte      * order markers.  UTF-16 has optional markers.     *     * If you select useByteOrderMarkers, there is no guarantee that the encoding     * will use the endianness specified.     *     */    public CTBConverter getCTBConverter(OSFCodeSetRegistry.Entry codeset,                                        boolean littleEndian,                                        boolean useByteOrderMarkers) {        // UCS2 doesn't have byte order markers, and we're encoding it        // as UTF-16 since UCS2 isn't available in all Java platforms.        // They should be identical with only minor differences in        // negative cases.        if (codeset == OSFCodeSetRegistry.UCS_2)            return new UTF16CTBConverter(littleEndian);        // We can write UTF-16 with or without a byte order marker.        if (codeset == OSFCodeSetRegistry.UTF_16) {            if (useByteOrderMarkers)                return new UTF16CTBConverter();            else                return new UTF16CTBConverter(littleEndian);        }        // Everything else uses the generic JavaCTBConverter.        //        // Variable width encodings are aligned on 1 byte boundaries.        // A fixed width encoding with a max. of 4 bytes/char should        // align on a 4 byte boundary.  Note that UTF-16 is a special        // case because of the optional byte order marker, so it's        // handled above.        //        // This doesn't matter for GIOP 1.2 wchars and wstrings        // since the encoded bytes are treated as an encapsulation.        int alignment = (!codeset.isFixedWidth() ?                         1 :                         codeset.getMaxBytesPerChar());                return new JavaCTBConverter(codeset, alignment);    }    /**     * BTCConverter factory for single byte or variable width encodings.     */    public BTCConverter getBTCConverter(OSFCodeSetRegistry.Entry codeset) {        return new JavaBTCConverter(codeset);    }    /**     * BTCConverter factory for fixed width multibyte encodings.     */    public BTCConverter getBTCConverter(OSFCodeSetRegistry.Entry codeset,                                        boolean defaultToLittleEndian) {        if (codeset == OSFCodeSetRegistry.UTF_16 ||            codeset == OSFCodeSetRegistry.UCS_2) {            return new UTF16BTCConverter(defaultToLittleEndian);        } else {            return new JavaBTCConverter(codeset);        }    }    /**      * Follows the code set negotiation algorithm in CORBA formal 99-10-07 13.7.2.     *     * Returns the proper negotiated OSF character encoding number or     * CodeSetConversion.FALLBACK_CODESET.     */    private int selectEncoding(CodeSetComponentInfo.CodeSetComponent client,                               CodeSetComponentInfo.CodeSetComponent server) {        // A "null" value for the server's nativeCodeSet means that        // the server desired not to indicate one.  We'll take that        // to mean that it wants the first thing in its conversion list.        // If it's conversion list is empty, too, then use the fallback        // codeset.        int serverNative = server.nativeCodeSet;        if (serverNative == 0) {            if (server.conversionCodeSets.length > 0)                serverNative = server.conversionCodeSets[0];            else                return CodeSetConversion.FALLBACK_CODESET;        }        if (client.nativeCodeSet == serverNative) {            // Best case -- client and server don't have to convert            return serverNative;        }        // Is this client capable of converting to the server's        // native code set?        for (int i = 0; i < client.conversionCodeSets.length; i++) {            if (serverNative == client.conversionCodeSets[i]) {                // The client will convert to the server's                // native code set.                return serverNative;            }        }        // Is the server capable of converting to the client's        // native code set?        for (int i = 0; i < server.conversionCodeSets.length; i++) {            if (client.nativeCodeSet == server.conversionCodeSets[i]) {                // The server will convert to the client's                // native code set.                return client.nativeCodeSet;            }        }        // See if there are any code sets that both the server and client        // support (giving preference to the server).  The order        // of conversion sets is from most to least desired.        for (int i = 0; i < server.conversionCodeSets.length; i++) {            for (int y = 0; y < client.conversionCodeSets.length; y++) {                if (server.conversionCodeSets[i] == client.conversionCodeSets[y]) {                    return server.conversionCodeSets[i];                }            }        }        // Before using the fallback codesets, the spec calls for a        // compatibility check on the native code sets.  It doesn't make        // sense because loss free communication is always possible with        // UTF8 and UTF16, the fall back code sets.  It's also a lot        // of work to implement.  In the case of incompatibility, the        // spec says to throw a CODESET_INCOMPATIBLE exception.                // Use the fallback        return CodeSetConversion.FALLBACK_CODESET;    }    /**     * Perform the code set negotiation algorithm and come up with     * the two encodings to use.     */    public CodeSetComponentInfo.CodeSetContext negotiate(CodeSetComponentInfo client,                                                         CodeSetComponentInfo server) {        int charData            = selectEncoding(client.getCharComponent(),                             server.getCharComponent());        if (charData == CodeSetConversion.FALLBACK_CODESET) {            charData = OSFCodeSetRegistry.UTF_8.getNumber();        }        int wcharData            = selectEncoding(client.getWCharComponent(),                             server.getWCharComponent());        if (wcharData == CodeSetConversion.FALLBACK_CODESET) {            wcharData = OSFCodeSetRegistry.UTF_16.getNumber();        }        return new CodeSetComponentInfo.CodeSetContext(charData,                                                       wcharData);    }    // No one should instantiate a CodeSetConversion but the singleton    // instance method    private CodeSetConversion() {}    // initialize-on-demand holder    private static class CodeSetConversionHolder {	static final CodeSetConversion csc = new CodeSetConversion() ;    }    /**     * CodeSetConversion is a singleton, and this is the access point.     */    public final static CodeSetConversion impl() {	return CodeSetConversionHolder.csc ;    }    // Singleton instance    private static CodeSetConversion implementation;    // Number used internally to indicate the fallback code    // set.    private static final int FALLBACK_CODESET = 0;    // Provides a thread local cache for the sun.io    // converters.    private CodeSetCache cache = new CodeSetCache();}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?