encodinginfo.java

来自「JAVA 所有包」· Java 代码 · 共 509 行 · 第 1/2 页

JAVA
509
字号
        /**         * The encoding.         */        final private String m_encoding;        /**         * m_first through m_last is the range of unicode         * values that this object will return an answer on.         * It may delegate to a similar object with a different         * range         */        final private int m_first;                /**         * m_explFirst through m_explLast is the range of unicode         * value that this object handles explicitly and does not         * delegate to a similar object.         */        final private int m_explFirst;        final private int m_explLast;        final private int m_last;        /**         * The object, of the same type as this one,         * that handles unicode values in a range before         * the range explictly handled by this object, and         * to which this object may delegate.         */        private InEncoding m_before;        /**         * The object, of the same type as this one,         * that handles unicode values in a range after         * the range explictly handled by this object, and         * to which this object may delegate.         */        private InEncoding m_after;                /**         * The number of unicode values explicitly handled         * by a single EncodingInfo object. This value is          * tuneable, but is set to 128 because that covers the         * entire low range of ASCII type chars within a single         * object.         */        private static final int RANGE = 128;        /**         * A flag to record if we already know the answer         * for the given unicode value.         */        final private boolean m_alreadyKnown[] = new boolean[RANGE];        /**         * A table holding the answer on whether the given unicode         * value is in the encoding.         */        final private boolean m_isInEncoding[] = new boolean[RANGE];                private EncodingImpl() {            // This object will answer whether any unicode value            // is in the encoding, it handles values 0 through Integer.MAX_VALUE            this(javaName, 0, Integer.MAX_VALUE, (char) 0);        }        private EncodingImpl(String encoding, int first, int last, int codePoint) {            // Set the range of unicode values that this object manages            // either explicitly or implicitly.            m_first = first;            m_last = last;                                    // Set the range of unicode values that this object             // explicitly manages            m_explFirst = codePoint;            m_explLast = codePoint + (RANGE-1);                          m_encoding = encoding;                        if (javaName != null)            {                // Some optimization.                if (0 <= m_explFirst && m_explFirst <= 127) {                    // This particular EncodingImpl explicitly handles                    // characters in the low range.                    if ("UTF8".equals(javaName)                        || "UTF-16".equals(javaName)                        || "ASCII".equals(javaName)                        || "US-ASCII".equals(javaName)                        || "Unicode".equals(javaName)                        || "UNICODE".equals(javaName)                        || javaName.startsWith("ISO8859")) {                                                // Not only does this EncodingImpl object explicitly                        // handle chracters in the low range, it is                        // also one that we know something about, without                        // needing to call inEncoding(char ch, String encoding)                        // for this low range                        //                        // By initializing the table ahead of time                        // for these low values, we prevent the expensive                        // inEncoding(char ch, String encoding)                        // from being called, at least for these common                        // encodings.                        for (int unicode = 1; unicode < 127; unicode++) {                            final int idx = unicode - m_explFirst;                            if (0 <= idx && idx < RANGE) {                                m_alreadyKnown[idx] = true;                                m_isInEncoding[idx] = true;                            }                        }                    }                }                /* A little bit more than optimization.                 *                  * We will say that any character is in the encoding if                 * we don't have an encoding.                 * This is meaningful when the serializer is being used                 * in temporary output state, where we are not writing to                 * the final output tree.  It is when writing to the                 * final output tree that we need to worry about the output                 * encoding                 */                if (javaName == null) {                    for (int idx = 0; idx < m_alreadyKnown.length; idx++) {                        m_alreadyKnown[idx] = true;                        m_isInEncoding[idx] = true;                    }                }            }        }    }    /**     * This is heart of the code that determines if a given character     * is in the given encoding. This method is probably expensive,     * and the answer should be cached.     * <p>     * This method is not a public API,     * and should only be used internally within the serializer.     * @param ch the char in question, that is not a high char of     * a high/low surrogate pair.     * @param encoding the Java name of the enocding.     *      * @xsl.usage internal     *      */    private static boolean inEncoding(char ch, String encoding) {        boolean isInEncoding;        try {            char cArray[] = new char[1];            cArray[0] = ch;            // Construct a String from the char             String s = new String(cArray);            // Encode the String into a sequence of bytes             // using the given, named charset.             byte[] bArray = s.getBytes(encoding);            isInEncoding = inEncoding(ch, bArray);        } catch (Exception e) {            isInEncoding = false;                        // If for some reason the encoding is null, e.g.            // for a temporary result tree, we should just            // say that every character is in the encoding.            if (encoding == null)            	isInEncoding = true;        }        return isInEncoding;    }        /**     * This is heart of the code that determines if a given high/low     * surrogate pair forms a character that is in the given encoding.     * This method is probably expensive, and the answer should be cached.      * <p>     * This method is not a public API,     * and should only be used internally within the serializer.     * @param high the high char of     * a high/low surrogate pair.     * @param low the low char of a high/low surrogate pair.     * @param encoding the Java name of the encoding.     *      * @xsl.usage internal     *      */     private static boolean inEncoding(char high, char low, String encoding) {        boolean isInEncoding;        try {            char cArray[] = new char[2];            cArray[0] = high;            cArray[1] = low;            // Construct a String from the char             String s = new String(cArray);            // Encode the String into a sequence of bytes             // using the given, named charset.             byte[] bArray = s.getBytes(encoding);            isInEncoding = inEncoding(high,bArray);        } catch (Exception e) {            isInEncoding = false;        }                return isInEncoding;    }         /**     * This method is the core of determining if character     * is in the encoding. The method is not foolproof, because     * s.getBytes(encoding) has specified behavior only if the     * characters are in the specified encoding. However this     * method tries it's best.     * @param ch the char that was converted using getBytes, or     * the first char of a high/low pair that was converted.     * @param data the bytes written out by the call to s.getBytes(encoding);     * @return true if the character is in the encoding.     */    private static boolean inEncoding(char ch, byte[] data) {        final boolean isInEncoding;        // If the string written out as data is not in the encoding,        // the output is not specified according to the documentation        // on the String.getBytes(encoding) method,        // but we do our best here.                if (data==null || data.length == 0) {            isInEncoding = false;        }        else {            if (data[0] == 0)                isInEncoding = false;            else if (data[0] == '?' && ch != '?')                isInEncoding = false;            /*             * else if (isJapanese) {             *   // isJapanese is really              *   //   (    "EUC-JP".equals(javaName)              *   //    ||  "EUC_JP".equals(javaName)             *  //     ||  "SJIS".equals(javaName)   )             *              *   // Work around some bugs in JRE for Japanese             *   if(data[0] == 0x21)             *     isInEncoding = false;             *   else if (ch == 0xA5)             *     isInEncoding = false;             *   else             *     isInEncoding = true;             * }             */                             else {                // We don't know for sure, but it looks like it is in the encoding                isInEncoding = true;             }        }        return isInEncoding;    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?