charinfo.java
来自「JAVA的一些源码 JAVA2 STANDARD EDITION DEVELO」· Java 代码 · 共 610 行 · 第 1/2 页
JAVA
610 行
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: CharInfo.java,v 1.11 2004/02/23 10:29:37 aruny Exp $ */package com.sun.org.apache.xml.internal.serializer;import java.io.BufferedReader;import java.io.InputStream;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.URL;import java.util.Hashtable;import java.util.PropertyResourceBundle;import java.util.Enumeration;import java.util.ResourceBundle;import javax.xml.transform.TransformerException;import com.sun.org.apache.xml.internal.res.XMLErrorResources;import com.sun.org.apache.xml.internal.res.XMLMessages;import com.sun.org.apache.xml.internal.utils.CharKey;import com.sun.org.apache.xml.internal.utils.SystemIDResolver;import com.sun.org.apache.xml.internal.utils.WrappedRuntimeException;/** * This class provides services that tell if a character should have * special treatement, such as entity reference substitution or normalization * of a newline character. It also provides character to entity reference * lookup. * * DEVELOPERS: See Known Issue in the constructor. * * @xsl.usage internal */class CharInfo{ /** Lookup table for characters to entity references. */ private Hashtable m_charToEntityRef = new Hashtable(); /** * The name of the HTML entities file. * If specified, the file will be resource loaded with the default class loader. */ public static String HTML_ENTITIES_RESOURCE = "com.sun.org.apache.xml.internal.serializer.HTMLEntities"; /** * The name of the XML entities file. * If specified, the file will be resource loaded with the default class loader. */ public static String XML_ENTITIES_RESOURCE = "com.sun.org.apache.xml.internal.serializer.XMLEntities"; /** The horizontal tab character, which the parser should always normalize. */ public static final char S_HORIZONAL_TAB = 0x09; /** The linefeed character, which the parser should always normalize. */ public static final char S_LINEFEED = 0x0A; /** The carriage return character, which the parser should always normalize. */ public static char S_CARRIAGERETURN = 0x0D; /** This flag is an optimization for HTML entities. It false if entities * other than quot (34), amp (38), lt (60) and gt (62) are defined * in the range 0 to 127. * @xsl.usage internal */ final boolean onlyQuotAmpLtGt; /** Copy the first 0,1 ... ASCII_MAX values into an array */ private static final int ASCII_MAX = 128; /** Array of values is faster access than a set of bits * to quickly check ASCII characters in attribute values. */ private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX]; /** Array of values is faster access than a set of bits * to quickly check ASCII characters in text nodes. */ private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX]; private boolean[] isCleanTextASCII = new boolean[ASCII_MAX]; /** An array of bits to record if the character is in the set. * Although information in this array is complete, the * isSpecialAttrASCII array is used first because access to its values * is common and faster. */ private int array_of_bits[] = createEmptySetOfIntegers(65535); // 5 for 32 bit words, 6 for 64 bit words ... /* * This constant is used to shift an integer to quickly * calculate which element its bit is stored in. * 5 for 32 bit words (int) , 6 for 64 bit words (long) */ private static final int SHIFT_PER_WORD = 5; /* * A mask to get the low order bits which are used to * calculate the value of the bit within a given word, * that will represent the presence of the integer in the * set. * * 0x1F for 32 bit words (int), * or 0x3F for 64 bit words (long) */ private static final int LOW_ORDER_BITMASK = 0x1f; /* * This is used for optimizing the lookup of bits representing * the integers in the set. It is the index of the first element * in the array array_of_bits[] that is not used. */ private int firstWordNotUsed; /** * Constructor that reads in a resource file that describes the mapping of * characters to entity references. * This constructor is private, just to force the use * of the getCharInfo(entitiesResource) factory * * Resource files must be encoded in UTF-8 and can either be properties * files with a .properties extension assumed. Alternatively, they can * have the following form, with no particular extension assumed: * * <pre> * # First char # is a comment * Entity numericValue * quot 34 * amp 38 * </pre> * * @param entitiesResource Name of properties or resource file that should * be loaded, which describes that mapping of characters to entity * references. */ private CharInfo(String entitiesResource, String method) { this(entitiesResource, method, false); } private CharInfo(String entitiesResource, String method, boolean internal) { ResourceBundle entities = null; boolean noExtraEntities = true; // Make various attempts to interpret the parameter as a properties // file or resource file, as follows: // // 1) attempt to load .properties file using ResourceBundle // 2) try using the class loader to find the specified file a resource // file // 3) try treating the resource a URI if (internal) { try { // Load entity property files by using PropertyResourceBundle, // cause of security issure for applets entities = PropertyResourceBundle.getBundle(entitiesResource); } catch (Exception e) {} } if (entities != null) { Enumeration keys = entities.getKeys(); while (keys.hasMoreElements()){ String name = (String) keys.nextElement(); String value = entities.getString(name); int code = Integer.parseInt(value); defineEntity(name, (char) code); if (extraEntity(code)) noExtraEntities = false; } set(S_LINEFEED); set(S_CARRIAGERETURN); } else { InputStream is = null; // Load user specified resource file by using URL loading, it // requires a valid URI as parameter try { if (internal) { is = CharInfo.class.getResourceAsStream(entitiesResource); } else { ClassLoader cl = ObjectFactory.findClassLoader(); if (cl == null) { is = ClassLoader.getSystemResourceAsStream(entitiesResource); } else { is = cl.getResourceAsStream(entitiesResource); } if (is == null) { try { URL url = new URL(entitiesResource); is = url.openStream(); } catch (Exception e) {} } } if (is == null) { throw new RuntimeException( XMLMessages.createXMLMessage( XMLErrorResources.ER_RESOURCE_COULD_NOT_FIND, new Object[] {entitiesResource, entitiesResource})); } // Fix Bugzilla#4000: force reading in UTF-8 // This creates the de facto standard that Xalan's resource // files must be encoded in UTF-8. This should work in all // JVMs. // // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which // didn't implement the UTF-8 encoding. Theoretically, we should // simply let it fail in that case, since the JVM is obviously // broken if it doesn't support such a basic standard. But // since there are still some users attempting to use VJ++ for // development, we have dropped in a fallback which makes a // second attempt using the platform's default encoding. In VJ++ // this is apparently ASCII, which is subset of UTF-8... and // since the strings we'll be reading here are also primarily // limited to the 7-bit ASCII range (at least, in English // versions of Xalan), this should work well enough to keep us // on the air until we're ready to officially decommit from // VJ++. BufferedReader reader; try { reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); } catch (UnsupportedEncodingException e) { reader = new BufferedReader(new InputStreamReader(is)); } String line = reader.readLine(); while (line != null) { if (line.length() == 0 || line.charAt(0) == '#') { line = reader.readLine(); continue; } int index = line.indexOf(' '); if (index > 1) { String name = line.substring(0, index); ++index; if (index < line.length()) { String value = line.substring(index); index = value.indexOf(' '); if (index > 0) { value = value.substring(0, index); } int code = Integer.parseInt(value); defineEntity(name, (char) code); if (extraEntity(code)) noExtraEntities = false; } } line = reader.readLine(); } is.close(); set(S_LINEFEED); set(S_CARRIAGERETURN); } catch (Exception e) { throw new RuntimeException( XMLMessages.createXMLMessage( XMLErrorResources.ER_RESOURCE_COULD_NOT_LOAD, new Object[] { entitiesResource, e.toString(), entitiesResource, e.toString()})); } finally { if (is != null) { try { is.close(); } catch (Exception except) {} } } } /* initialize the array isCleanTextASCII[] with a cache of values * for use by ToStream.character(char[], int , int) * and the array isSpecialTextASCII[] with the opposite values
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?