encodinginfo.java
来自「JAVA 所有包」· Java 代码 · 共 509 行 · 第 1/2 页
JAVA
509 行
/** * The encoding. */ final private String m_encoding; /** * m_first through m_last is the range of unicode * values that this object will return an answer on. * It may delegate to a similar object with a different * range */ final private int m_first; /** * m_explFirst through m_explLast is the range of unicode * value that this object handles explicitly and does not * delegate to a similar object. */ final private int m_explFirst; final private int m_explLast; final private int m_last; /** * The object, of the same type as this one, * that handles unicode values in a range before * the range explictly handled by this object, and * to which this object may delegate. */ private InEncoding m_before; /** * The object, of the same type as this one, * that handles unicode values in a range after * the range explictly handled by this object, and * to which this object may delegate. */ private InEncoding m_after; /** * The number of unicode values explicitly handled * by a single EncodingInfo object. This value is * tuneable, but is set to 128 because that covers the * entire low range of ASCII type chars within a single * object. */ private static final int RANGE = 128; /** * A flag to record if we already know the answer * for the given unicode value. */ final private boolean m_alreadyKnown[] = new boolean[RANGE]; /** * A table holding the answer on whether the given unicode * value is in the encoding. */ final private boolean m_isInEncoding[] = new boolean[RANGE]; private EncodingImpl() { // This object will answer whether any unicode value // is in the encoding, it handles values 0 through Integer.MAX_VALUE this(javaName, 0, Integer.MAX_VALUE, (char) 0); } private EncodingImpl(String encoding, int first, int last, int codePoint) { // Set the range of unicode values that this object manages // either explicitly or implicitly. m_first = first; m_last = last; // Set the range of unicode values that this object // explicitly manages m_explFirst = codePoint; m_explLast = codePoint + (RANGE-1); m_encoding = encoding; if (javaName != null) { // Some optimization. if (0 <= m_explFirst && m_explFirst <= 127) { // This particular EncodingImpl explicitly handles // characters in the low range. if ("UTF8".equals(javaName) || "UTF-16".equals(javaName) || "ASCII".equals(javaName) || "US-ASCII".equals(javaName) || "Unicode".equals(javaName) || "UNICODE".equals(javaName) || javaName.startsWith("ISO8859")) { // Not only does this EncodingImpl object explicitly // handle chracters in the low range, it is // also one that we know something about, without // needing to call inEncoding(char ch, String encoding) // for this low range // // By initializing the table ahead of time // for these low values, we prevent the expensive // inEncoding(char ch, String encoding) // from being called, at least for these common // encodings. for (int unicode = 1; unicode < 127; unicode++) { final int idx = unicode - m_explFirst; if (0 <= idx && idx < RANGE) { m_alreadyKnown[idx] = true; m_isInEncoding[idx] = true; } } } } /* A little bit more than optimization. * * We will say that any character is in the encoding if * we don't have an encoding. * This is meaningful when the serializer is being used * in temporary output state, where we are not writing to * the final output tree. It is when writing to the * final output tree that we need to worry about the output * encoding */ if (javaName == null) { for (int idx = 0; idx < m_alreadyKnown.length; idx++) { m_alreadyKnown[idx] = true; m_isInEncoding[idx] = true; } } } } } /** * This is heart of the code that determines if a given character * is in the given encoding. This method is probably expensive, * and the answer should be cached. * <p> * This method is not a public API, * and should only be used internally within the serializer. * @param ch the char in question, that is not a high char of * a high/low surrogate pair. * @param encoding the Java name of the enocding. * * @xsl.usage internal * */ private static boolean inEncoding(char ch, String encoding) { boolean isInEncoding; try { char cArray[] = new char[1]; cArray[0] = ch; // Construct a String from the char String s = new String(cArray); // Encode the String into a sequence of bytes // using the given, named charset. byte[] bArray = s.getBytes(encoding); isInEncoding = inEncoding(ch, bArray); } catch (Exception e) { isInEncoding = false; // If for some reason the encoding is null, e.g. // for a temporary result tree, we should just // say that every character is in the encoding. if (encoding == null) isInEncoding = true; } return isInEncoding; } /** * This is heart of the code that determines if a given high/low * surrogate pair forms a character that is in the given encoding. * This method is probably expensive, and the answer should be cached. * <p> * This method is not a public API, * and should only be used internally within the serializer. * @param high the high char of * a high/low surrogate pair. * @param low the low char of a high/low surrogate pair. * @param encoding the Java name of the encoding. * * @xsl.usage internal * */ private static boolean inEncoding(char high, char low, String encoding) { boolean isInEncoding; try { char cArray[] = new char[2]; cArray[0] = high; cArray[1] = low; // Construct a String from the char String s = new String(cArray); // Encode the String into a sequence of bytes // using the given, named charset. byte[] bArray = s.getBytes(encoding); isInEncoding = inEncoding(high,bArray); } catch (Exception e) { isInEncoding = false; } return isInEncoding; } /** * This method is the core of determining if character * is in the encoding. The method is not foolproof, because * s.getBytes(encoding) has specified behavior only if the * characters are in the specified encoding. However this * method tries it's best. * @param ch the char that was converted using getBytes, or * the first char of a high/low pair that was converted. * @param data the bytes written out by the call to s.getBytes(encoding); * @return true if the character is in the encoding. */ private static boolean inEncoding(char ch, byte[] data) { final boolean isInEncoding; // If the string written out as data is not in the encoding, // the output is not specified according to the documentation // on the String.getBytes(encoding) method, // but we do our best here. if (data==null || data.length == 0) { isInEncoding = false; } else { if (data[0] == 0) isInEncoding = false; else if (data[0] == '?' && ch != '?') isInEncoding = false; /* * else if (isJapanese) { * // isJapanese is really * // ( "EUC-JP".equals(javaName) * // || "EUC_JP".equals(javaName) * // || "SJIS".equals(javaName) ) * * // Work around some bugs in JRE for Japanese * if(data[0] == 0x21) * isInEncoding = false; * else if (ch == 0xA5) * isInEncoding = false; * else * isInEncoding = true; * } */ else { // We don't know for sure, but it looks like it is in the encoding isInEncoding = true; } } return isInEncoding; }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?