📄 xmlhelper.java
字号:
// Import packages dealing with XML parsing and representation
import org.xml.sax.*;
import org.w3c.dom.*;
import org.apache.xerces.dom.*;
import org.apache.xerces.parsers.*;
import org.apache.xml.serialize.*;
// Import packages dealing with XSL transformation
import org.apache.xalan.xslt.*;
import org.apache.xalan.xpath.*;
// Import the Tidy package for HTML to XML transformation
import org.w3c.tidy.*;
// Import a few standard Java packages
import java.io.*;
import java.util.*;
import java.net.*;
/**
* XMLHelper is a class designed to provide some generic utility functions
* for working with HTML, XHTML, XML, and XSL. All methods contained herein
* are static, so no instantiation of this class is ever necessary. The methods
* deal with parsing, input/output, retrieving files from the network, and
* transformation and clean-up of documents.
*
* @author Jared Jackson, Email: <a mailto="jjared@almaden.ibm.com">jjared@almaden.ibm.com</a>
* @see XMLHelperException
*/
public class XMLHelper {
/**
* This method creates a default XML document. The document is empty except
* for a single root element, with tag name as specified by the parameter.
*
* @param rootName The name of the root element of the XML document. If <CODE>null</CODE> or empty, no root element is added to the document.
* @return An empty XML document, save possibly a single root node.
*/
public static Document createXml(String rootName) {
Document doc = new DocumentImpl();
if (rootName == null || rootName.trim().equals("")) return doc;
doc.appendChild(doc.createElement(rootName));
return doc;
}
/**
* Given an <CODE>URL</CODE> as a <CODE>String</CODE>, this method retrieves
* the file located at that URL, and attempts to parse it as XML.
*
* @param url A URL encoding such as "http://www.ibm.com/someXML.xml" of the target document
* @return A parsed XML document found at the given URL
* @exception XMLHelperException Thrown if the URL is malformed, the file
* at the given URL can not be obtained, or the file found is not valid XML.
*/
public static Document parseXMLFromURLString(String url) throws XMLHelperException {
return parseXMLFromURL(convertStringToURL(url));
}
/**
* Given an <CODE>URL</CODE>, this method retrieves
* the file located at that URL, and attempts to parse it as XML.
*
* @param url A <CODE>URL</CODE> java class instantiation of the target document
* @return A parsed XML document found at the given URL
* @exception XMLHelperException Thrown if the URL is malformed, the file
* at the given URL can not be obtained, or the file found is not valid XML.
*/
public static Document parseXMLFromURL(URL url) throws XMLHelperException {
try {
URLConnection inConnection = url.openConnection();
InputSource is = new InputSource(inConnection.getInputStream());
return parseXMLFromInputSource(is);
} catch (IOException ioe) {
throw new XMLHelperException("Unable to read from source string", ioe);
}
}
/**
* Given an XML document currently unparsed in the form of a <CODE>String</CODE>,
* this method attempts to parse the content of that <CODE>String</CODE> as XML.
*
* @param source A <CODE>String</CODE> encoding of a XML document.
* @return A parsed XML document
* @exception XMLHelperException Thrown if the string given is not valid XML.
*/
public static Document parseXMLFromString(String source) throws XMLHelperException {
InputSource is = new InputSource(new StringReader(source));
return parseXMLFromInputSource(is);
}
/**
* Given an XML document pointed to by a <CODE>File</CODE> object, this method
* attemps to read the file and parse it as XML.
*
* @param sourceFile A <CODE>File</CODE> object referencing an XML file.
* @return A parsed XML document
* @exception XMLHelperException Thrown if the file is unreadable or the file does not contain a valid XML document
*/
public static Document parseXMLFromFile(File sourceFile) throws XMLHelperException {
InputSource is = null;
try {
is = new InputSource(new FileInputStream(sourceFile));
} catch (IOException ioe) {
throw new XMLHelperException("The XML file could not be retrieved", ioe);
}
return parseXMLFromInputSource(is);
}
/**
* Given an XML document pointed to by a file path expression, this method
* attemps to read the file and parse it as XML.
*
* @param sourceFile An absolute or relative file path expression.
* @return A parsed XML document
* @exception XMLHelperException Thrown if the file is unreadable or the file does not contain a valid XML document
*/
public static Document parseXMLFromFile(String sourceFile) throws XMLHelperException {
InputSource is = null;
try {
is = new InputSource(new FileInputStream(sourceFile));
} catch (IOException ioe) {
throw new XMLHelperException("The XML file could not be retrieved", ioe);
}
return parseXMLFromInputSource(is);
}
// This is the real work horse around XML parsing, the public methods each attempt to
// create InputSource objects, then call this method for parsing
private static Document parseXMLFromInputSource(InputSource is) throws XMLHelperException {
Document doc = null;
try {
DOMParser parser = new DOMParser();
parser.parse(is);
doc = parser.getDocument();
} catch (IOException ioe) {
throw new XMLHelperException("Unable to read from source string", ioe);
} catch (SAXException saxe) {
throw new XMLHelperException("Unable to parse the given string", saxe);
}
return doc;
}
/**
* Given two XML documents, one the target XML file and one an XSL file, this method
* applies an XSL transform defined by the XSL file on the XML file and returns the
* resulting document.
*
* @param xmlDoc The source XML file
* @param xslDoc An XML file that also follows the XSL transformation language specification
* @return The document resulting from applying xslDoc to xmlDoc.
* @exception XMLHelperException Thrown if the XSL document is either poorly formed as XSL or if it encounters an error during transformation.
*/
public static Document transformXML(Document xmlDoc, Document xslDoc) throws XMLHelperException {
try {
XSLTInputSource xmlIn = new XSLTInputSource(xmlDoc);
XSLTInputSource xslIn = new XSLTInputSource(xslDoc);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XSLTResultTarget xmlOut = new XSLTResultTarget(baos);
XSLTProcessor processor = XSLTProcessorFactory.getProcessor();
processor.process(xmlIn, xslIn, xmlOut);
baos.close();
String result = baos.toString();
//System.out.print(result);
return parseXMLFromString(result);
} catch (SAXException saxe) {
throw new XMLHelperException("Unable to perform transform", saxe);
} catch (IOException ioe) {
throw new XMLHelperException("Unable to perform transform", ioe);
}
}
/**
* Given an XML document, a pretty (tab delimited and with line breaks) representation is
* sent to the specified <CODE>PrintStream</CODE> object. This is the most convenient way to
* output an XML document to standard out.
*
* @param doc The XML document to output
* @param stream The stream to send the result to. (e.g. <CODE>System.out</CODE> or <CODE>System.err</CODE>)
* @exception XMLHelperException Thrown in the event of an I/O error.
*/
public static void outputXML(Document doc, PrintStream stream) throws XMLHelperException {
try {
OutputFormat of = new OutputFormat(doc);
of.setIndenting(true);
XMLSerializer serializer = new XMLSerializer(stream, of);
serializer.serialize(doc);
} catch (IOException ioe) {
throw new XMLHelperException("Unable to write to the given print stream", ioe);
}
}
/**
* Given an XML document and a relative or absolute path name for a file, writes
* the XML document to that file location. The format of the written XML document
* will be tab delimited and line breaked. The file name will need to use the system
* dependent separator character(s) for directory navigation.
*
* @param doc The XML document to output.
* @param fileName A file name either relative to the running Java virtual machine, or absolute.
* @exception XMLHelperException Thrown if an I/O error occurs.
*/
public static void outputXMLToFile(Document doc, String fileName) throws XMLHelperException {
try {
OutputFormat of = new OutputFormat(doc);
of.setIndenting(true);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -