codesetconversion.java
来自「JAVA 所有包」· Java 代码 · 共 690 行 · 第 1/2 页
JAVA
690 行
// CharBuffer returned by the decoder will set its limit // to byte immediately after the last written byte. resultingNumChars = charBuf.limit(); // IMPORTANT - It's possible the underlying char[] in the // CharBuffer returned by btc.decode(byteBuf) // is longer in length than the number of characters // decoded. Hence, the check below to ensure the // char[] returned contains all the chars that have // been decoded and no more. if (charBuf.limit() == charBuf.capacity()) { buffer = charBuf.array(); } else { buffer = new char[charBuf.limit()]; charBuf.get(buffer, 0, charBuf.limit()).position(0); } return buffer; } catch (IllegalStateException ile) { // There were a decoding operation already in progress throw wrapper.btcConverterFailure( ile ) ; } catch (MalformedInputException mie) { // There were illegal Unicode char pairs throw wrapper.badUnicodePair( mie ) ; } catch (UnmappableCharacterException uce) { // A character doesn't map to the desired code set. // CORBA formal 00-11-03. throw omgWrapper.charNotInCodeset( uce ) ; } catch (CharacterCodingException cce) { // If this happens, then a character decoding error occured. throw wrapper.btcConverterFailure( cce ) ; } } /** * Utility method to find a CharsetDecoder in the * cache or create a new one if necessary. Throws an * INTERNAL if the code set is unknown. */ protected CharsetDecoder getConverter(String javaCodeSetName) { CharsetDecoder result = null; try { result = cache.getByteToCharConverter(javaCodeSetName); if (result == null) { Charset tmpCharset = Charset.forName(javaCodeSetName); result = tmpCharset.newDecoder(); cache.setConverter(javaCodeSetName, result); } } catch(IllegalCharsetNameException icne) { // This can only happen if one of our charset entries has // an illegal name. throw wrapper.invalidBtcConverterName( icne, javaCodeSetName ) ; } return result; } } /** * Special converter for UTF16 since it's required to optionally * support a byte order marker while the internal Java converters * either require it or require that it isn't there. * * The solution is to check for the byte order marker, and if we * need to do something differently, switch internal converters. */ private class UTF16BTCConverter extends JavaBTCConverter { private boolean defaultToLittleEndian; private boolean converterUsesBOM = true; private static final char UTF16_BE_MARKER = (char) 0xfeff; private static final char UTF16_LE_MARKER = (char) 0xfffe; // When there isn't a byte order marker, used the byte // order specified. public UTF16BTCConverter(boolean defaultToLittleEndian) { super(OSFCodeSetRegistry.UTF_16); this.defaultToLittleEndian = defaultToLittleEndian; } public char[] getChars(byte[] bytes, int offset, int numBytes) { if (hasUTF16ByteOrderMarker(bytes, offset, numBytes)) { if (!converterUsesBOM) switchToConverter(OSFCodeSetRegistry.UTF_16); converterUsesBOM = true; return super.getChars(bytes, offset, numBytes); } else { if (converterUsesBOM) { if (defaultToLittleEndian) switchToConverter(OSFCodeSetRegistry.UTF_16LE); else switchToConverter(OSFCodeSetRegistry.UTF_16BE); converterUsesBOM = false; } return super.getChars(bytes, offset, numBytes); } } /** * Utility method for determining if a UTF-16 byte order marker is present. */ private boolean hasUTF16ByteOrderMarker(byte[] array, int offset, int length) { // If there aren't enough bytes to represent the marker and data, // return false. if (length >= 4) { int b1 = array[offset] & 0x00FF; int b2 = array[offset + 1] & 0x00FF; char marker = (char)((b1 << 8) | (b2 << 0)); return (marker == UTF16_BE_MARKER || marker == UTF16_LE_MARKER); } else return false; } /** * The current solution for dealing with UTF-16 in CORBA * is that if our sun.io converter requires byte order markers, * and then we see a CORBA wstring/wchar without them, we * switch to the sun.io converter that doesn't require them. */ private void switchToConverter(OSFCodeSetRegistry.Entry newCodeSet) { // Use the getConverter method from our superclass. btc = super.getConverter(newCodeSet.getName()); } } /** * CTB converter factory for single byte or variable length encodings. */ public CTBConverter getCTBConverter(OSFCodeSetRegistry.Entry codeset) { int alignment = (!codeset.isFixedWidth() ? 1 : codeset.getMaxBytesPerChar()); return new JavaCTBConverter(codeset, alignment); } /** * CTB converter factory for multibyte (mainly fixed) encodings. * * Because of the awkwardness with byte order markers and the possibility of * using UCS-2, you must specify both the endianness of the stream as well as * whether or not to use byte order markers if applicable. UCS-2 has no byte * order markers. UTF-16 has optional markers. * * If you select useByteOrderMarkers, there is no guarantee that the encoding * will use the endianness specified. * */ public CTBConverter getCTBConverter(OSFCodeSetRegistry.Entry codeset, boolean littleEndian, boolean useByteOrderMarkers) { // UCS2 doesn't have byte order markers, and we're encoding it // as UTF-16 since UCS2 isn't available in all Java platforms. // They should be identical with only minor differences in // negative cases. if (codeset == OSFCodeSetRegistry.UCS_2) return new UTF16CTBConverter(littleEndian); // We can write UTF-16 with or without a byte order marker. if (codeset == OSFCodeSetRegistry.UTF_16) { if (useByteOrderMarkers) return new UTF16CTBConverter(); else return new UTF16CTBConverter(littleEndian); } // Everything else uses the generic JavaCTBConverter. // // Variable width encodings are aligned on 1 byte boundaries. // A fixed width encoding with a max. of 4 bytes/char should // align on a 4 byte boundary. Note that UTF-16 is a special // case because of the optional byte order marker, so it's // handled above. // // This doesn't matter for GIOP 1.2 wchars and wstrings // since the encoded bytes are treated as an encapsulation. int alignment = (!codeset.isFixedWidth() ? 1 : codeset.getMaxBytesPerChar()); return new JavaCTBConverter(codeset, alignment); } /** * BTCConverter factory for single byte or variable width encodings. */ public BTCConverter getBTCConverter(OSFCodeSetRegistry.Entry codeset) { return new JavaBTCConverter(codeset); } /** * BTCConverter factory for fixed width multibyte encodings. */ public BTCConverter getBTCConverter(OSFCodeSetRegistry.Entry codeset, boolean defaultToLittleEndian) { if (codeset == OSFCodeSetRegistry.UTF_16 || codeset == OSFCodeSetRegistry.UCS_2) { return new UTF16BTCConverter(defaultToLittleEndian); } else { return new JavaBTCConverter(codeset); } } /** * Follows the code set negotiation algorithm in CORBA formal 99-10-07 13.7.2. * * Returns the proper negotiated OSF character encoding number or * CodeSetConversion.FALLBACK_CODESET. */ private int selectEncoding(CodeSetComponentInfo.CodeSetComponent client, CodeSetComponentInfo.CodeSetComponent server) { // A "null" value for the server's nativeCodeSet means that // the server desired not to indicate one. We'll take that // to mean that it wants the first thing in its conversion list. // If it's conversion list is empty, too, then use the fallback // codeset. int serverNative = server.nativeCodeSet; if (serverNative == 0) { if (server.conversionCodeSets.length > 0) serverNative = server.conversionCodeSets[0]; else return CodeSetConversion.FALLBACK_CODESET; } if (client.nativeCodeSet == serverNative) { // Best case -- client and server don't have to convert return serverNative; } // Is this client capable of converting to the server's // native code set? for (int i = 0; i < client.conversionCodeSets.length; i++) { if (serverNative == client.conversionCodeSets[i]) { // The client will convert to the server's // native code set. return serverNative; } } // Is the server capable of converting to the client's // native code set? for (int i = 0; i < server.conversionCodeSets.length; i++) { if (client.nativeCodeSet == server.conversionCodeSets[i]) { // The server will convert to the client's // native code set. return client.nativeCodeSet; } } // See if there are any code sets that both the server and client // support (giving preference to the server). The order // of conversion sets is from most to least desired. for (int i = 0; i < server.conversionCodeSets.length; i++) { for (int y = 0; y < client.conversionCodeSets.length; y++) { if (server.conversionCodeSets[i] == client.conversionCodeSets[y]) { return server.conversionCodeSets[i]; } } } // Before using the fallback codesets, the spec calls for a // compatibility check on the native code sets. It doesn't make // sense because loss free communication is always possible with // UTF8 and UTF16, the fall back code sets. It's also a lot // of work to implement. In the case of incompatibility, the // spec says to throw a CODESET_INCOMPATIBLE exception. // Use the fallback return CodeSetConversion.FALLBACK_CODESET; } /** * Perform the code set negotiation algorithm and come up with * the two encodings to use. */ public CodeSetComponentInfo.CodeSetContext negotiate(CodeSetComponentInfo client, CodeSetComponentInfo server) { int charData = selectEncoding(client.getCharComponent(), server.getCharComponent()); if (charData == CodeSetConversion.FALLBACK_CODESET) { charData = OSFCodeSetRegistry.UTF_8.getNumber(); } int wcharData = selectEncoding(client.getWCharComponent(), server.getWCharComponent()); if (wcharData == CodeSetConversion.FALLBACK_CODESET) { wcharData = OSFCodeSetRegistry.UTF_16.getNumber(); } return new CodeSetComponentInfo.CodeSetContext(charData, wcharData); } // No one should instantiate a CodeSetConversion but the singleton // instance method private CodeSetConversion() {} // initialize-on-demand holder private static class CodeSetConversionHolder { static final CodeSetConversion csc = new CodeSetConversion() ; } /** * CodeSetConversion is a singleton, and this is the access point. */ public final static CodeSetConversion impl() { return CodeSetConversionHolder.csc ; } // Singleton instance private static CodeSetConversion implementation; // Number used internally to indicate the fallback code // set. private static final int FALLBACK_CODESET = 0; // Provides a thread local cache for the sun.io // converters. private CodeSetCache cache = new CodeSetCache();}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?