xmlentityscanner.java
来自「JAVA的一些源码 JAVA2 STANDARD EDITION DEVELO」· Java 代码 · 共 1,660 行 · 第 1/5 页
JAVA
1,660 行
/* * The Apache Software License, Version 1.1 * * * Copyright (c) 1999-2003 The Apache Software Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */package com.sun.org.apache.xerces.internal.impl;import java.io.EOFException;import java.io.IOException;import java.util.Locale;import com.sun.org.apache.xerces.internal.impl.io.UCSReader;import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;import com.sun.org.apache.xerces.internal.util.SymbolTable;import com.sun.org.apache.xerces.internal.util.XMLChar;import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;import com.sun.org.apache.xerces.internal.xni.QName;import com.sun.org.apache.xerces.internal.xni.XMLLocator;import com.sun.org.apache.xerces.internal.xni.XMLString;/** * Implements the entity scanner methods. * * @author Andy Clark, IBM * @author Neil Graham, IBM * @version $Id: XMLEntityScanner.java,v 1.19 2003/11/13 18:45:59 mrglavas Exp $ */public class XMLEntityScanner implements XMLLocator { // constants private static final boolean DEBUG_ENCODINGS = false; private static final boolean DEBUG_BUFFER = false; // // Data // private XMLEntityManager fEntityManager = null; protected XMLEntityManager.ScannedEntity fCurrentEntity = null; protected SymbolTable fSymbolTable = null; protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; /** * Error reporter. This property identifier is: * http://apache.org/xml/properties/internal/error-reporter */ protected XMLErrorReporter fErrorReporter; // // Constructors // /** Default constructor. */ public XMLEntityScanner() { } // <init>() // // XMLEntityScanner methods // /** * Returns the base system identifier of the currently scanned * entity, or null if none is available. */ public String getBaseSystemId() { return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; } // getBaseSystemId():String /** * Sets the encoding of the scanner. This method is used by the * scanners if the XMLDecl or TextDecl line contains an encoding * pseudo-attribute. * <p> * <strong>Note:</strong> The underlying character reader on the * current entity will be changed to accomodate the new encoding. * However, the new encoding is ignored if the current reader was * not constructed from an input stream (e.g. an external entity * that is resolved directly to the appropriate java.io.Reader * object). * * @param encoding The IANA encoding name of the new encoding. * * @throws IOException Thrown if the new encoding is not supported. * * @see com.sun.org.apache.xerces.internal.util.EncodingMap */ public void setEncoding(String encoding) throws IOException { if (DEBUG_ENCODINGS) { System.out.println("$$$ setEncoding: "+encoding); } if (fCurrentEntity.stream != null) { // if the encoding is the same, don't change the reader and // re-use the original reader used by the OneCharReader // NOTE: Besides saving an object, this overcomes deficiencies // in the UTF-16 reader supplied with the standard Java // distribution (up to and including 1.3). The UTF-16 // decoder buffers 8K blocks even when only asked to read // a single char! -Ac if (fCurrentEntity.encoding == null || !fCurrentEntity.encoding.equals(encoding)) { // UTF-16 is a bit of a special case. If the encoding is UTF-16, // and we know the endian-ness, we shouldn't change readers. // If it's ISO-10646-UCS-(2|4), then we'll have to deduce // the endian-ness from the encoding we presently have. if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { String ENCODING = encoding.toUpperCase(Locale.ENGLISH); if(ENCODING.equals("UTF-16")) return; if(ENCODING.equals("ISO-10646-UCS-4")) { if(fCurrentEntity.encoding.equals("UTF-16BE")) { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); } else { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); } return; } if(ENCODING.equals("ISO-10646-UCS-2")) { if(fCurrentEntity.encoding.equals("UTF-16BE")) { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); } else { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); } return; } } // wrap a new reader around the input stream, changing // the encoding if (DEBUG_ENCODINGS) { System.out.println("$$$ creating new reader from stream: "+ fCurrentEntity.stream); } //fCurrentEntity.stream.reset(); fCurrentEntity.setReader(fCurrentEntity.stream, encoding, null); fCurrentEntity.encoding = encoding; } else { if (DEBUG_ENCODINGS) System.out.println("$$$ reusing old reader on stream"); } } } // setEncoding(String) /** Returns true if the current entity being scanned is external. */ public boolean isExternal() { return fCurrentEntity.isExternal(); } // isExternal():boolean /** * Returns the next character on the input. * <p> * <strong>Note:</strong> The character is <em>not</em> consumed. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int peekChar() throws IOException { if (DEBUG_BUFFER) { System.out.print("(peekChar: "); XMLEntityManager.print(fCurrentEntity); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // peek at character int c = fCurrentEntity.ch[fCurrentEntity.position]; // return peeked character if (DEBUG_BUFFER) { System.out.print(")peekChar: "); XMLEntityManager.print(fCurrentEntity); if (fCurrentEntity.isExternal()) { System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); } else { System.out.println(" -> '"+(char)c+"'"); } } if (fCurrentEntity.isExternal()) { return c != '\r' ? c : '\n'; } else { return c; } } // peekChar():int /** * Returns the next character on the input. * <p> * <strong>Note:</strong> The character is consumed. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int scanChar() throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanChar: "); XMLEntityManager.print(fCurrentEntity); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // scan character int c = fCurrentEntity.ch[fCurrentEntity.position++]; boolean external = false; if (c == '\n' || (c == '\r' && (external = fCurrentEntity.isExternal()))) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = (char)c; load(1, false); } if (c == '\r' && external) { if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { fCurrentEntity.position--; } c = '\n'; } } // return character that was scanned if (DEBUG_BUFFER) { System.out.print(")scanChar: "); XMLEntityManager.print(fCurrentEntity); System.out.println(" -> '"+(char)c+"'"); } fCurrentEntity.columnNumber++; return c; } // scanChar():int /** * Returns a string matching the NMTOKEN production appearing immediately * on the input as a symbol, or null if NMTOKEN Name string is present. * <p> * <strong>Note:</strong> The NMTOKEN characters are consumed. * <p> * <strong>Note:</strong> The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see com.sun.org.apache.xerces.internal.util.SymbolTable * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName */ public String scanNmtoken() throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanNmtoken: "); XMLEntityManager.print(fCurrentEntity); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // scan nmtoken int offset = fCurrentEntity.position; while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?