📄 xmlhelper.java
字号:
File f = new File(fileName);
FileOutputStream fos = new FileOutputStream(f);
XMLSerializer serializer = new XMLSerializer(fos, of);
serializer.serialize(doc);
fos.close();
} catch (IOException ioe) {
throw new XMLHelperException("Unable to write to the given file", ioe);
}
}
/**
* A utility method for converting an XML document to a <CODE>String</CODE> object.
* This method is included in case the user would like to do their own I/O in a way
* not specified in this class.
*
* @param doc The XML document to be encoded as a <CODE>String</CODE>.
* @return The XML document as text in a <CODE>String</CODE>.
*/
public static String convertXMLToString(Document doc) throws XMLHelperException {
try {
OutputFormat of = new OutputFormat(doc);
of.setIndenting(true);
StringWriter sw = new StringWriter();
XMLSerializer serializer = new XMLSerializer(sw, of);
serializer.serialize(doc);
return sw.toString();
} catch (IOException ioe) {
throw new XMLHelperException("Unable to write to the string", ioe);
}
}
/**
* <P>Copies the content of one XML <CODE>Element</CODE> to another. By setting the
* <CODE>childrenOnly</CODE> parameter to <CODE>false</CODE>, the element is simply
* imported as a child to the original element. If the parameter is instead
* <CODE>true</CODE>, then the children of the element are imported as children
* to the original element.</P>
*
* <P>For instance, let the <CODE>mergeToXML</CODE> parameter be the XML:</P>
* <BLOCKQUOTE>
* <PRE>
* <Original>
* </Original>
* </PRE>
* </BLOCKQUOTE>
* <P>And let the <CODE>mergeFromXML</CODE> parameter be the XML:</P>
* <BLOCKQUOTE>
* <PRE>
* <Target>
* <Child>Child 1</Child>
* <Child>Child 2</Child>
* </Target>
* </PRE>
* </BLOCKQUOTE>
* <P>If <CODE>childrenOnly</CODE> is set to <CODE>false</CODE>, the result is:</P>
* <BLOCKQUOTE>
* <PRE>
* <Original>
* <Target>
* <Child>Child 1</Child>
* <Child>Child 2</Child>
* </Target>
* </Original>
* </PRE>
* </BLOCKQUOTE>
* <P>Otherwise, if <CODE>childrenOnly</CODE> is set to <CODE>true</CODE>, the result is:</P>
* <BLOCKQUOTE>
* <PRE>
* <Original>
* <Child>Child 1</Child>
* <Child>Child 2</Child>
* </Original>
* </PRE>
* </BLOCKQUOTE>
*
* @param mergeToXML The element into which the XML will be inserted as children.
* @param mergeFromXML The element from which the XML will be copied and imported.
* @param childrenOnly If <CODE>true</CODE> grab the children and ignore the parent. If <CODE>false</CODE> grab everything.
*/
public static void mergeXML(Element mergeToXML, Element mergeFromXML, boolean childrenOnly) {
Document toDoc = mergeToXML.getOwnerDocument();
Element copyElem = (Element)(toDoc.importNode(mergeFromXML,true));
if (childrenOnly) {
NodeList nlist = copyElem.getChildNodes();
for (int i=0; i < nlist.getLength(); i++) {
org.w3c.dom.Node n = nlist.item(i);
mergeToXML.appendChild(n);
}
return;
} else {
mergeToXML.appendChild(copyElem);
}
}
/**
* Retrieves an HTML page from a URL encoded as a <CODE>String</CODE> and
* attempts to clean up the source of that HTML to remove author errors. If
* successful, the resulting document is converted to XHTML and returned as
* an XML document.
*
* @param url A <CODE>String</CODE> encoding of a URL (e.g. "http://www.ibm.com/index.html").
* @return an XML document representing the XHTML of the source of the HTML file.
* @exception XMLHelperException Thrown if the URL is malformed, the HTML source can not be obtained, or the tool is unable to convert the source to XML.
*/
public static Document tidyHTML(String url) throws XMLHelperException {
return tidyHTML(convertStringToURL(url));
}
/**
* Retrieves an HTML page from a java <CODE>URL</CODE> object and
* attempts to clean up the source of that HTML to remove author errors. If
* successful, the resulting document is converted to XHTML and returned as
* an XML document.
*
* @param url A <CODE>URL</CODE> object hopefully pointing to an HTML file.
* @return an XML document representing the XHTML of the source of the HTML file.
* @exception XMLHelperException Thrown if the HTML source can not be obtained or the tool is unable to convert the source to XML.
*/
public static Document tidyHTML(URL url) throws XMLHelperException {
try {
URLConnection inConnection = url.openConnection();
if (inConnection.getContentType().startsWith("text/xml") ||
inConnection.getContentType().startsWith("text/xhtml")) {
// All ready an XML source
return parseXMLFromURL(url);
} else if (inConnection.getContentType().startsWith("text/html")) {
// An HTML source
InputStream is = inConnection.getInputStream();
// Clean the input stream
ByteArrayOutputStream out = new ByteArrayOutputStream();
int totalBytes = 0;
byte[] buffer = new byte[20000];
while (true) {
int bytesRead = is.read(buffer, 0, buffer.length);
if (bytesRead < 0) break;
// Remove binary below space except tab and newline
for (int i=0; i < bytesRead; i++) {
byte b = buffer[i];
if (b < 32 && b!= 10 && b != 13 && b != 9) b = 32;
buffer[i] = b;
}
out.write(buffer, 0, bytesRead);
totalBytes += bytesRead;
}
is.close();
out.close();
String outContent = out.toString();
InputStream in = new ByteArrayInputStream(out.toByteArray());
//org.w3c.tidy.TagTable tags = org.w3c.tidy.TagTable.getDefaultTagTable();
//tags.defineBlockTag("script");
org.w3c.tidy.TagTable tags = new org.w3c.tidy.TagTable();
tags.defineBlockTag("script");
Tidy tidy = new Tidy();
tidy.setShowWarnings(false);
tidy.setXmlOut(true);
tidy.setXmlPi(false);
tidy.setDocType("omit");
tidy.setXHTML(false);
tidy.setRawOut(true);
tidy.setNumEntities(true);
tidy.setQuiet(true);
tidy.setFixComments(true);
tidy.setIndentContent(true);
tidy.setCharEncoding(org.w3c.tidy.Configuration.DOCTYPE_AUTO);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
org.w3c.tidy.Node tNode = tidy.parse(in, baos);
//String result = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n" + baos.toString();
String result = "<?xml version=\"1.0\" encoding=\"gb2312\" ?>\n" + baos.toString();
// Strip the DOCTYPE and script elements - This is an optional step
int startIndex = 0;
int endIndex = 0;
if ((startIndex = result.indexOf("<!DOCTYPE")) >= 0) {
endIndex = result.indexOf(">",startIndex);
result = result.substring(0,startIndex) +
result.substring(endIndex + 1, result.length());
}
while ((startIndex = result.indexOf("<script")) >= 0) {
endIndex = result.indexOf("</script>");
result = result.substring(0,startIndex) + result.substring(endIndex + 9, result.length());
}
in.close();
baos.close();
return parseXMLFromString(result);
} else {
throw new XMLHelperException("Unable to tidy content type: " + inConnection.getContentType());
}
} catch (IOException ioe) {
throw new XMLHelperException("Unable to perform input/output", ioe);
}
}
// A utility method for converting a String encoding of a URL to a URL
private static URL convertStringToURL(String url) throws XMLHelperException {
try {
return new URL(url);
} catch (MalformedURLException murle) {
throw new XMLHelperException(url + " is not a well formed URL", murle);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -