charinfo.java

来自「JAVA 所有包」· Java 代码 · 共 745 行 · 第 1/2 页

JAVA
745
字号
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: CharInfo.java,v 1.2.4.1 2005/09/15 08:15:14 suresh_emailid Exp $ */package com.sun.org.apache.xml.internal.serializer;import java.io.BufferedReader;import java.io.InputStream;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.URL;import java.util.Enumeration;import java.util.Hashtable;import java.util.PropertyResourceBundle;import java.util.ResourceBundle;import java.security.AccessController;import java.security.PrivilegedAction;import javax.xml.transform.TransformerException;import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;import com.sun.org.apache.xml.internal.serializer.utils.SystemIDResolver;import com.sun.org.apache.xml.internal.serializer.utils.Utils;import com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException;/** * This class provides services that tell if a character should have * special treatement, such as entity reference substitution or normalization * of a newline character.  It also provides character to entity reference * lookup. * * DEVELOPERS: See Known Issue in the constructor. *  * @xsl.usage internal */final class CharInfo{    /** Given a character, lookup a String to output (e.g. a decorated entity reference). */    private Hashtable m_charToString = new Hashtable();    /**     * The name of the HTML entities file.     * If specified, the file will be resource loaded with the default class loader.     */    public static final String HTML_ENTITIES_RESOURCE =                 "com.sun.org.apache.xml.internal.serializer.HTMLEntities";    /**     * The name of the XML entities file.     * If specified, the file will be resource loaded with the default class loader.     */    public static final String XML_ENTITIES_RESOURCE =                 "com.sun.org.apache.xml.internal.serializer.XMLEntities";    /** The horizontal tab character, which the parser should always normalize. */    public static final char S_HORIZONAL_TAB = 0x09;    /** The linefeed character, which the parser should always normalize. */    public static final char S_LINEFEED = 0x0A;    /** The carriage return character, which the parser should always normalize. */    public static final char S_CARRIAGERETURN = 0x0D;        /** This flag is an optimization for HTML entities. It false if entities      * other than quot (34), amp (38), lt (60) and gt (62) are defined     * in the range 0 to 127.     * @xsl.usage internal     */        final boolean onlyQuotAmpLtGt;        /** Copy the first 0,1 ... ASCII_MAX values into an array */    private static final int ASCII_MAX = 128;        /** Array of values is faster access than a set of bits      * to quickly check ASCII characters in attribute values.      */    private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX];        /** Array of values is faster access than a set of bits      * to quickly check ASCII characters in text nodes.      */    private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX];    private boolean[] isCleanTextASCII = new boolean[ASCII_MAX];    /** An array of bits to record if the character is in the set.     * Although information in this array is complete, the     * isSpecialAttrASCII array is used first because access to its values     * is common and faster.     */       private int array_of_bits[] = createEmptySetOfIntegers(65535);             // 5 for 32 bit words,  6 for 64 bit words ...    /*     * This constant is used to shift an integer to quickly     * calculate which element its bit is stored in.     * 5 for 32 bit words (int) ,  6 for 64 bit words (long)     */    private static final int SHIFT_PER_WORD = 5;        /*     * A mask to get the low order bits which are used to     * calculate the value of the bit within a given word,     * that will represent the presence of the integer in the      * set.     *      * 0x1F for 32 bit words (int),     * or 0x3F for 64 bit words (long)      */    private static final int LOW_ORDER_BITMASK = 0x1f;        /*     * This is used for optimizing the lookup of bits representing     * the integers in the set. It is the index of the first element     * in the array array_of_bits[] that is not used.     */    private int firstWordNotUsed;    /**     * Constructor that reads in a resource file that describes the mapping of     * characters to entity references.     * This constructor is private, just to force the use     * of the getCharInfo(entitiesResource) factory     *     * Resource files must be encoded in UTF-8 and can either be properties     * files with a .properties extension assumed.  Alternatively, they can     * have the following form, with no particular extension assumed:     *     * <pre>     * # First char # is a comment     * Entity numericValue     * quot 34     * amp 38     * </pre>     *         * @param entitiesResource Name of properties or resource file that should     * be loaded, which describes that mapping of characters to entity     * references.     */    private CharInfo(String entitiesResource, String method)    {        this(entitiesResource, method, false);    }    private CharInfo(String entitiesResource, String method, boolean internal)    {        ResourceBundle entities = null;        boolean noExtraEntities = true;        // Make various attempts to interpret the parameter as a properties        // file or resource file, as follows:        //        //   1) attempt to load .properties file using ResourceBundle        //   2) try using the class loader to find the specified file a resource        //      file        //   3) try treating the resource a URI        if (internal) {             try {                // Load entity property files by using PropertyResourceBundle,                // cause of security issure for applets                entities = PropertyResourceBundle.getBundle(entitiesResource);            } catch (Exception e) {}        }        if (entities != null) {            Enumeration keys = entities.getKeys();            while (keys.hasMoreElements()){                String name = (String) keys.nextElement();                String value = entities.getString(name);                int code = Integer.parseInt(value);                defineEntity(name, (char) code);                if (extraEntity(code))                    noExtraEntities = false;            }            set(S_LINEFEED);            set(S_CARRIAGERETURN);        } else {            InputStream is = null;            // Load user specified resource file by using URL loading, it            // requires a valid URI as parameter            try {                if (internal) {                    is = CharInfo.class.getResourceAsStream(entitiesResource);                } else {                    ClassLoader cl = ObjectFactory.findClassLoader();                    if (cl == null) {                        is = ClassLoader.getSystemResourceAsStream(entitiesResource);                    } else {                        is = cl.getResourceAsStream(entitiesResource);                    }                    if (is == null) {                        try {                            URL url = new URL(entitiesResource);                            is = url.openStream();                        } catch (Exception e) {}                    }                }                if (is == null) {                    throw new RuntimeException(                        Utils.messages.createMessage(                            MsgKey.ER_RESOURCE_COULD_NOT_FIND,                            new Object[] {entitiesResource, entitiesResource}));                }                // Fix Bugzilla#4000: force reading in UTF-8                //  This creates the de facto standard that Xalan's resource                 //  files must be encoded in UTF-8. This should work in all                // JVMs.                //                // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which                // didn't implement the UTF-8 encoding. Theoretically, we should                // simply let it fail in that case, since the JVM is obviously                // broken if it doesn't support such a basic standard.  But                // since there are still some users attempting to use VJ++ for                // development, we have dropped in a fallback which makes a                // second attempt using the platform's default encoding. In VJ++                // this is apparently ASCII, which is subset of UTF-8... and                // since the strings we'll be reading here are also primarily                // limited to the 7-bit ASCII range (at least, in English                // versions of Xalan), this should work well enough to keep us                // on the air until we're ready to officially decommit from                // VJ++.                BufferedReader reader;                try {                    reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));                } catch (UnsupportedEncodingException e) {                    reader = new BufferedReader(new InputStreamReader(is));                }                String line = reader.readLine();                while (line != null) {                    if (line.length() == 0 || line.charAt(0) == '#') {                        line = reader.readLine();                        continue;                    }                    int index = line.indexOf(' ');                    if (index > 1) {                        String name = line.substring(0, index);                        ++index;                        if (index < line.length()) {                            String value = line.substring(index);                            index = value.indexOf(' ');                            if (index > 0) {                                value = value.substring(0, index);                            }                            int code = Integer.parseInt(value);                            defineEntity(name, (char) code);                            if (extraEntity(code))                                noExtraEntities = false;                        }                    }                    line = reader.readLine();                }                is.close();                set(S_LINEFEED);                set(S_CARRIAGERETURN);            } catch (Exception e) {                throw new RuntimeException(                    Utils.messages.createMessage(                        MsgKey.ER_RESOURCE_COULD_NOT_LOAD,                        new Object[] { entitiesResource,                                       e.toString(),                                       entitiesResource,                                       e.toString()}));            } finally {                if (is != null) {                    try {                        is.close();                    } catch (Exception except) {}                }            }        }                  /* initialize the array isCleanTextASCII[] with a cache of values         * for use by ToStream.character(char[], int , int)         * and the array isSpecialTextASCII[] with the opposite values         * (all in the name of performance!)         */        for (int ch = 0; ch <ASCII_MAX; ch++)        if((((0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 == ch)))             && (!get(ch))) || ('"' == ch))        {            isCleanTextASCII[ch] = true;            isSpecialTextASCII[ch] = false;        }        else {            isCleanTextASCII[ch] = false;            isSpecialTextASCII[ch] = true;             }                       onlyQuotAmpLtGt = noExtraEntities;        // initialize the array with a cache of the BitSet values        for (int i=0; i<ASCII_MAX; i++)            isSpecialAttrASCII[i] = get(i);                       /* Now that we've used get(ch) just above to initialize the         * two arrays we will change by adding a tab to the set of          * special chars for XML (but not HTML!).         * We do this because a tab is always a         * special character in an XML attribute,          * but only a special character in XML text          * if it has an entity defined for it.         * This is the reason for this delay.         */        if (Method.XML.equals(method))         {            isSpecialAttrASCII[S_HORIZONAL_TAB] = true;        }    }    /**     * Defines a new character reference. The reference's name and value are     * supplied. Nothing happens if the character reference is already defined.     * <p>Unlike internal entities, character references are a string to single     * character mapping. They are used to map non-ASCII characters both on     * parsing and printing, primarily for HTML documents. '&lt;amp;' is an     * example of a character reference.</p>     *     * @param name The entity's name     * @param value The entity's value     */    private void defineEntity(String name, char value)    {        StringBuffer sb = new StringBuffer("&");        sb.append(name);        sb.append(';');        String entityString = sb.toString();                defineChar2StringMapping(entityString, value);    }    private CharKey m_charKey = new CharKey();    /**     * Map a character to a String. For example given     * the character '>' this method would return the fully decorated     * entity name "&lt;".     * Strings for entity references are loaded from a properties file,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?