charinfo.java
来自「JAVA 所有包」· Java 代码 · 共 745 行 · 第 1/2 页
JAVA
745 行
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: CharInfo.java,v 1.2.4.1 2005/09/15 08:15:14 suresh_emailid Exp $ */package com.sun.org.apache.xml.internal.serializer;import java.io.BufferedReader;import java.io.InputStream;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.URL;import java.util.Enumeration;import java.util.Hashtable;import java.util.PropertyResourceBundle;import java.util.ResourceBundle;import java.security.AccessController;import java.security.PrivilegedAction;import javax.xml.transform.TransformerException;import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;import com.sun.org.apache.xml.internal.serializer.utils.SystemIDResolver;import com.sun.org.apache.xml.internal.serializer.utils.Utils;import com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException;/** * This class provides services that tell if a character should have * special treatement, such as entity reference substitution or normalization * of a newline character. It also provides character to entity reference * lookup. * * DEVELOPERS: See Known Issue in the constructor. * * @xsl.usage internal */final class CharInfo{ /** Given a character, lookup a String to output (e.g. a decorated entity reference). */ private Hashtable m_charToString = new Hashtable(); /** * The name of the HTML entities file. * If specified, the file will be resource loaded with the default class loader. */ public static final String HTML_ENTITIES_RESOURCE = "com.sun.org.apache.xml.internal.serializer.HTMLEntities"; /** * The name of the XML entities file. * If specified, the file will be resource loaded with the default class loader. */ public static final String XML_ENTITIES_RESOURCE = "com.sun.org.apache.xml.internal.serializer.XMLEntities"; /** The horizontal tab character, which the parser should always normalize. */ public static final char S_HORIZONAL_TAB = 0x09; /** The linefeed character, which the parser should always normalize. */ public static final char S_LINEFEED = 0x0A; /** The carriage return character, which the parser should always normalize. */ public static final char S_CARRIAGERETURN = 0x0D; /** This flag is an optimization for HTML entities. It false if entities * other than quot (34), amp (38), lt (60) and gt (62) are defined * in the range 0 to 127. * @xsl.usage internal */ final boolean onlyQuotAmpLtGt; /** Copy the first 0,1 ... ASCII_MAX values into an array */ private static final int ASCII_MAX = 128; /** Array of values is faster access than a set of bits * to quickly check ASCII characters in attribute values. */ private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX]; /** Array of values is faster access than a set of bits * to quickly check ASCII characters in text nodes. */ private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX]; private boolean[] isCleanTextASCII = new boolean[ASCII_MAX]; /** An array of bits to record if the character is in the set. * Although information in this array is complete, the * isSpecialAttrASCII array is used first because access to its values * is common and faster. */ private int array_of_bits[] = createEmptySetOfIntegers(65535); // 5 for 32 bit words, 6 for 64 bit words ... /* * This constant is used to shift an integer to quickly * calculate which element its bit is stored in. * 5 for 32 bit words (int) , 6 for 64 bit words (long) */ private static final int SHIFT_PER_WORD = 5; /* * A mask to get the low order bits which are used to * calculate the value of the bit within a given word, * that will represent the presence of the integer in the * set. * * 0x1F for 32 bit words (int), * or 0x3F for 64 bit words (long) */ private static final int LOW_ORDER_BITMASK = 0x1f; /* * This is used for optimizing the lookup of bits representing * the integers in the set. It is the index of the first element * in the array array_of_bits[] that is not used. */ private int firstWordNotUsed; /** * Constructor that reads in a resource file that describes the mapping of * characters to entity references. * This constructor is private, just to force the use * of the getCharInfo(entitiesResource) factory * * Resource files must be encoded in UTF-8 and can either be properties * files with a .properties extension assumed. Alternatively, they can * have the following form, with no particular extension assumed: * * <pre> * # First char # is a comment * Entity numericValue * quot 34 * amp 38 * </pre> * * @param entitiesResource Name of properties or resource file that should * be loaded, which describes that mapping of characters to entity * references. */ private CharInfo(String entitiesResource, String method) { this(entitiesResource, method, false); } private CharInfo(String entitiesResource, String method, boolean internal) { ResourceBundle entities = null; boolean noExtraEntities = true; // Make various attempts to interpret the parameter as a properties // file or resource file, as follows: // // 1) attempt to load .properties file using ResourceBundle // 2) try using the class loader to find the specified file a resource // file // 3) try treating the resource a URI if (internal) { try { // Load entity property files by using PropertyResourceBundle, // cause of security issure for applets entities = PropertyResourceBundle.getBundle(entitiesResource); } catch (Exception e) {} } if (entities != null) { Enumeration keys = entities.getKeys(); while (keys.hasMoreElements()){ String name = (String) keys.nextElement(); String value = entities.getString(name); int code = Integer.parseInt(value); defineEntity(name, (char) code); if (extraEntity(code)) noExtraEntities = false; } set(S_LINEFEED); set(S_CARRIAGERETURN); } else { InputStream is = null; // Load user specified resource file by using URL loading, it // requires a valid URI as parameter try { if (internal) { is = CharInfo.class.getResourceAsStream(entitiesResource); } else { ClassLoader cl = ObjectFactory.findClassLoader(); if (cl == null) { is = ClassLoader.getSystemResourceAsStream(entitiesResource); } else { is = cl.getResourceAsStream(entitiesResource); } if (is == null) { try { URL url = new URL(entitiesResource); is = url.openStream(); } catch (Exception e) {} } } if (is == null) { throw new RuntimeException( Utils.messages.createMessage( MsgKey.ER_RESOURCE_COULD_NOT_FIND, new Object[] {entitiesResource, entitiesResource})); } // Fix Bugzilla#4000: force reading in UTF-8 // This creates the de facto standard that Xalan's resource // files must be encoded in UTF-8. This should work in all // JVMs. // // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which // didn't implement the UTF-8 encoding. Theoretically, we should // simply let it fail in that case, since the JVM is obviously // broken if it doesn't support such a basic standard. But // since there are still some users attempting to use VJ++ for // development, we have dropped in a fallback which makes a // second attempt using the platform's default encoding. In VJ++ // this is apparently ASCII, which is subset of UTF-8... and // since the strings we'll be reading here are also primarily // limited to the 7-bit ASCII range (at least, in English // versions of Xalan), this should work well enough to keep us // on the air until we're ready to officially decommit from // VJ++. BufferedReader reader; try { reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); } catch (UnsupportedEncodingException e) { reader = new BufferedReader(new InputStreamReader(is)); } String line = reader.readLine(); while (line != null) { if (line.length() == 0 || line.charAt(0) == '#') { line = reader.readLine(); continue; } int index = line.indexOf(' '); if (index > 1) { String name = line.substring(0, index); ++index; if (index < line.length()) { String value = line.substring(index); index = value.indexOf(' '); if (index > 0) { value = value.substring(0, index); } int code = Integer.parseInt(value); defineEntity(name, (char) code); if (extraEntity(code)) noExtraEntities = false; } } line = reader.readLine(); } is.close(); set(S_LINEFEED); set(S_CARRIAGERETURN); } catch (Exception e) { throw new RuntimeException( Utils.messages.createMessage( MsgKey.ER_RESOURCE_COULD_NOT_LOAD, new Object[] { entitiesResource, e.toString(), entitiesResource, e.toString()})); } finally { if (is != null) { try { is.close(); } catch (Exception except) {} } } } /* initialize the array isCleanTextASCII[] with a cache of values * for use by ToStream.character(char[], int , int) * and the array isSpecialTextASCII[] with the opposite values * (all in the name of performance!) */ for (int ch = 0; ch <ASCII_MAX; ch++) if((((0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 == ch))) && (!get(ch))) || ('"' == ch)) { isCleanTextASCII[ch] = true; isSpecialTextASCII[ch] = false; } else { isCleanTextASCII[ch] = false; isSpecialTextASCII[ch] = true; } onlyQuotAmpLtGt = noExtraEntities; // initialize the array with a cache of the BitSet values for (int i=0; i<ASCII_MAX; i++) isSpecialAttrASCII[i] = get(i); /* Now that we've used get(ch) just above to initialize the * two arrays we will change by adding a tab to the set of * special chars for XML (but not HTML!). * We do this because a tab is always a * special character in an XML attribute, * but only a special character in XML text * if it has an entity defined for it. * This is the reason for this delay. */ if (Method.XML.equals(method)) { isSpecialAttrASCII[S_HORIZONAL_TAB] = true; } } /** * Defines a new character reference. The reference's name and value are * supplied. Nothing happens if the character reference is already defined. * <p>Unlike internal entities, character references are a string to single * character mapping. They are used to map non-ASCII characters both on * parsing and printing, primarily for HTML documents. '<amp;' is an * example of a character reference.</p> * * @param name The entity's name * @param value The entity's value */ private void defineEntity(String name, char value) { StringBuffer sb = new StringBuffer("&"); sb.append(name); sb.append(';'); String entityString = sb.toString(); defineChar2StringMapping(entityString, value); } private CharKey m_charKey = new CharKey(); /** * Map a character to a String. For example given * the character '>' this method would return the fully decorated * entity name "<". * Strings for entity references are loaded from a properties file,
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?