📄 saxreader.java
字号:
/*
* Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
*
* This software is open source.
* See the bottom of this file for the licence.
*/
package org.dom4j.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Serializable;
import java.net.URL;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentFactory;
import org.dom4j.ElementHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* <p>
* <code>SAXReader</code> creates a DOM4J tree from SAX parsing events.
* </p>
*
* <p>
* The actual SAX parser that is used by this class is configurable so you can
* use your favourite SAX parser if you wish. DOM4J comes configured with its
* own SAX parser so you do not need to worry about configuring the SAX parser.
* </p>
*
* <p>
* To explicitly configure the SAX parser that is used via Java code you can use
* a constructor or use the {@link #setXMLReader(XMLReader)}or {@link
* #setXMLReaderClassName(String)} methods.
* </p>
*
* <p>
* If the parser is not specified explicitly then the standard SAX policy of
* using the <code>org.xml.sax.driver</code> system property is used to
* determine the implementation class of {@link XMLReader}.
* </p>
*
* <p>
* If the <code>org.xml.sax.driver</code> system property is not defined then
* JAXP is used via reflection (so that DOM4J is not explicitly dependent on the
* JAXP classes) to load the JAXP configured SAXParser. If there is any error
* creating a JAXP SAXParser an informational message is output and then the
* default (Aelfred) SAX parser is used instead.
* </p>
*
* <p>
* If you are trying to use JAXP to explicitly set your SAX parser and are
* experiencing problems, you can turn on verbose error reporting by defining
* the system property <code>org.dom4j.verbose</code> to be "true" which will
* output a more detailed description of why JAXP could not find a SAX parser
* </p>
*
* <p>
* For more information on JAXP please go to <a
* href="http://java.sun.com/xml/">Sun's Java & XML site </a>
* </p>
*
* @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a>
* @version $Revision: 1.58 $
*/
public class SAXReader {
private static final String SAX_STRING_INTERNING =
"http://xml.org/sax/features/string-interning";
private static final String SAX_NAMESPACE_PREFIXES =
"http://xml.org/sax/features/namespace-prefixes";
private static final String SAX_NAMESPACES =
"http://xml.org/sax/features/namespaces";
private static final String SAX_DECL_HANDLER =
"http://xml.org/sax/properties/declaration-handler";
private static final String SAX_LEXICAL_HANDLER =
"http://xml.org/sax/properties/lexical-handler";
private static final String SAX_LEXICALHANDLER =
"http://xml.org/sax/handlers/LexicalHandler";
/** <code>DocumentFactory</code> used to create new document objects */
private DocumentFactory factory;
/** <code>XMLReader</code> used to parse the SAX events */
private XMLReader xmlReader;
/** Whether validation should occur */
private boolean validating;
/** DispatchHandler to call when each <code>Element</code> is encountered */
private DispatchHandler dispatchHandler;
/** ErrorHandler class to use */
private ErrorHandler errorHandler;
/** The entity resolver */
private EntityResolver entityResolver;
/** Should element & attribute names and namespace URIs be interned? */
private boolean stringInternEnabled = true;
/** Should internal DTD declarations be expanded into a List in the DTD */
private boolean includeInternalDTDDeclarations = false;
/** Should external DTD declarations be expanded into a List in the DTD */
private boolean includeExternalDTDDeclarations = false;
/** Whether adjacent text nodes should be merged */
private boolean mergeAdjacentText = false;
/** Holds value of property stripWhitespaceText. */
private boolean stripWhitespaceText = false;
/** Should we ignore comments */
private boolean ignoreComments = false;
/** Encoding of InputSource - null means system default encoding */
private String encoding = null;
// private boolean includeExternalGeneralEntities = false;
// private boolean includeExternalParameterEntities = false;
/** The SAX filter used to filter SAX events */
private XMLFilter xmlFilter;
public SAXReader() {
}
public SAXReader(boolean validating) {
this.validating = validating;
}
public SAXReader(DocumentFactory factory) {
this.factory = factory;
}
public SAXReader(DocumentFactory factory, boolean validating) {
this.factory = factory;
this.validating = validating;
}
public SAXReader(XMLReader xmlReader) {
this.xmlReader = xmlReader;
}
public SAXReader(XMLReader xmlReader, boolean validating) {
this.xmlReader = xmlReader;
this.validating = validating;
}
public SAXReader(String xmlReaderClassName) throws SAXException {
if (xmlReaderClassName != null) {
this.xmlReader = XMLReaderFactory
.createXMLReader(xmlReaderClassName);
}
}
public SAXReader(String xmlReaderClassName, boolean validating)
throws SAXException {
if (xmlReaderClassName != null) {
this.xmlReader = XMLReaderFactory
.createXMLReader(xmlReaderClassName);
}
this.validating = validating;
}
/**
* Allows a SAX property to be set on the underlying SAX parser. This can be
* useful to set parser-specific properties such as the location of schema
* or DTD resources. Though use this method with caution as it has the
* possibility of breaking the standard behaviour. An alternative to calling
* this method is to correctly configure an XMLReader object instance and
* call the {@link #setXMLReader(XMLReader)}method
*
* @param name
* is the SAX property name
* @param value
* is the value of the SAX property
*
* @throws SAXException
* if the XMLReader could not be created or the property could
* not be changed.
*/
public void setProperty(String name, Object value) throws SAXException {
getXMLReader().setProperty(name, value);
}
/**
* Sets a SAX feature on the underlying SAX parser. This can be useful to
* set parser-specific features. Though use this method with caution as it
* has the possibility of breaking the standard behaviour. An alternative to
* calling this method is to correctly configure an XMLReader object
* instance and call the {@link #setXMLReader(XMLReader)}method
*
* @param name
* is the SAX feature name
* @param value
* is the value of the SAX feature
*
* @throws SAXException
* if the XMLReader could not be created or the feature could
* not be changed.
*/
public void setFeature(String name, boolean value) throws SAXException {
getXMLReader().setFeature(name, value);
}
/**
* <p>
* Reads a Document from the given <code>File</code>
* </p>
*
* @param file
* is the <code>File</code> to read from.
*
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing.
*/
public Document read(File file) throws DocumentException {
try {
/*
* We cannot convert the file to an URL because if the filename
* contains '#' characters, there will be problems with the URL in
* the InputSource (because a URL like
* http://myhost.com/index#anchor is treated the same as
* http://myhost.com/index) Thanks to Christian Oetterli
*/
InputSource source = new InputSource(new FileInputStream(file));
if (this.encoding != null) {
source.setEncoding(this.encoding);
}
String path = file.getAbsolutePath();
if (path != null) {
// Code taken from Ant FileUtils
StringBuffer sb = new StringBuffer("file://");
// add an extra slash for filesystems with drive-specifiers
if (!path.startsWith(File.separator)) {
sb.append("/");
}
path = path.replace('\\', '/');
sb.append(path);
source.setSystemId(sb.toString());
}
return read(source);
} catch (FileNotFoundException e) {
throw new DocumentException(e.getMessage(), e);
}
}
/**
* <p>
* Reads a Document from the given <code>URL</code> using SAX
* </p>
*
* @param url
* <code>URL</code> to read from.
*
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing.
*/
public Document read(URL url) throws DocumentException {
String systemID = url.toExternalForm();
InputSource source = new InputSource(systemID);
if (this.encoding != null) {
source.setEncoding(this.encoding);
}
return read(source);
}
/**
* <p>
* Reads a Document from the given URL or filename using SAX.
* </p>
*
* <p>
* If the systemId contains a <code>':'</code> character then it is
* assumed to be a URL otherwise its assumed to be a file name. If you want
* finer grained control over this mechansim then please explicitly pass in
* either a {@link URL}or a {@link File}instance instead of a {@link
* String} to denote the source of the document.
* </p>
*
* @param systemId
* is a URL for a document or a file name.
*
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing.
*/
public Document read(String systemId) throws DocumentException {
InputSource source = new InputSource(systemId);
if (this.encoding != null) {
source.setEncoding(this.encoding);
}
return read(source);
}
/**
* <p>
* Reads a Document from the given stream using SAX
* </p>
*
* @param in
* <code>InputStream</code> to read from.
*
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing.
*/
public Document read(InputStream in) throws DocumentException {
InputSource source = new InputSource(in);
if (this.encoding != null) {
source.setEncoding(this.encoding);
}
return read(source);
}
/**
* <p>
* Reads a Document from the given <code>Reader</code> using SAX
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -