📄 stockhelper.java

📁 本程序用JAVA编制
💻 JAVA
字号:
// Import packages dealing with XML parsing and representationimport org.xml.sax.*;import org.w3c.dom.*;import org.apache.xerces.dom.*;import org.apache.xerces.parsers.*;import org.apache.xml.serialize.*;import org.apache.xalan.xslt.*; // Import packages dealing with XSL transformationimport org.apache.xalan.xpath.*;import org.w3c.tidy.*;  // Import the Tidy package for HTML to XML transformationimport java.io.*;      // Import a few standard Java packagesimport java.util.*;import java.net.*;/** * XMLHelper is a class designed to provide some generic utility functions * for working with HTML, XHTML, XML, and XSL. All methods contained herein * are static, so no instantiation of this class is ever necessary. The methods * deal with parsing, input/output, retrieving files from the network, and * transformation and clean-up of documents. * * @author Jared Jackson, Email: <a mailto="jjared@almaden.ibm.com">jjared@almaden.ibm.com</a> * @see XMLHelperException */public class StockHelper {  /**   * This method creates a default XML document. The document is empty except   * for a single root element, with tag name as specified by the parameter.   *   * @param rootName The name of the root element of the XML document. If <CODE>null</CODE> or empty, no root element is added to the document.   * @return An empty XML document, save possibly a single root node.   */  public static Document createXml(String rootName) {    Document doc = new DocumentImpl();    if (rootName == null || rootName.trim().equals("")) return doc;    doc.appendChild(doc.createElement(rootName));    return doc;  }  /**   * Given an <CODE>URL</CODE> as a <CODE>String</CODE>, this method retrieves   * the file located at that URL, and attempts to parse it as XML.   *   * @param url A URL encoding such as "http://www.ibm.com/someXML.xml" of the target document   * @return A parsed XML document found at the given URL   * @exception XMLHelperException Thrown if the URL is malformed, the file   * at the given URL can not be obtained, or the file found is not valid XML.   */  public static Document parseXMLFromURLString(String url) throws XMLHelperException {    return parseXMLFromURL(convertStringToURL(url));  }  /**   * Given an <CODE>URL</CODE>, this method retrieves   * the file located at that URL, and attempts to parse it as XML.   *   * @param url A <CODE>URL</CODE> java class instantiation of the target document   * @return A parsed XML document found at the given URL   * @exception XMLHelperException Thrown if the URL is malformed, the file   * at the given URL can not be obtained, or the file found is not valid XML.   */  public static Document parseXMLFromURL(URL url) throws XMLHelperException {    try {      URLConnection inConnection = url.openConnection();      InputSource is = new InputSource(inConnection.getInputStream());      return parseXMLFromInputSource(is);    } catch (IOException ioe) {      throw new XMLHelperException("Unable to read from source string", ioe);    }  }  /**   * Given an XML document currently unparsed in the form of a <CODE>String</CODE>,   * this method attempts to parse the content of that <CODE>String</CODE> as XML.   *   * @param source A <CODE>String</CODE> encoding of a XML document.   * @return A parsed XML document   * @exception XMLHelperException Thrown if the string given is not valid XML.   */  public static Document parseXMLFromString(String source) throws XMLHelperException {    InputSource is = new InputSource(new StringReader(source));    return parseXMLFromInputSource(is);  }  /**   * Given an XML document pointed to by a <CODE>File</CODE> object, this method   * attemps to read the file and parse it as XML.   *   * @param sourceFile A <CODE>File</CODE> object referencing an XML file.   * @return A parsed XML document   * @exception XMLHelperException Thrown if the file is unreadable or the file does not contain a valid XML document   */  public static Document parseXMLFromFile(File sourceFile) throws XMLHelperException {    InputSource is = null;    try {      is = new InputSource(new FileInputStream(sourceFile));    } catch (IOException ioe) {      throw new XMLHelperException("The XML file could not be retrieved", ioe);    }    return parseXMLFromInputSource(is);  }  /**   * Given an XML document pointed to by a file path expression, this method   * attemps to read the file and parse it as XML.   *   * @param sourceFile An absolute or relative file path expression.   * @return A parsed XML document   * @exception XMLHelperException Thrown if the file is unreadable or the file does not contain a valid XML document   */  public static Document parseXMLFromFile(String sourceFile) throws XMLHelperException {    InputSource is = null;    try {      is = new InputSource(new FileInputStream(sourceFile));    } catch (IOException ioe) {      throw new XMLHelperException("The XML file could not be retrieved", ioe);    }    return parseXMLFromInputSource(is);  }  // This is the real work horse around XML parsing, the public methods each attempt to  // create InputSource objects, then call this method for parsing  private static Document parseXMLFromInputSource(InputSource is) throws XMLHelperException {    Document doc = null;    try {      DOMParser parser = new DOMParser();      parser.parse(is);      doc = parser.getDocument();    } catch (IOException ioe) {      throw new XMLHelperException("Unable to read from source string", ioe);    } catch (SAXException saxe) {      throw new XMLHelperException("Unable to parse the given string", saxe);    }    return doc;  }  /**   * Given two XML documents, one the target XML file and one an XSL file, this method   * applies an XSL transform defined by the XSL file on the XML file and returns the   * resulting document.   *   * @param xmlDoc The source XML file   * @param xslDoc An XML file that also follows the XSL transformation language specification   * @return The document resulting from applying xslDoc to xmlDoc.   * @exception XMLHelperException Thrown if the XSL document is either poorly formed as XSL or if it encounters an error during transformation.   */  public static Document transformXML(Document xmlDoc, Document xslDoc) throws XMLHelperException {    try {      XSLTInputSource xmlIn = new XSLTInputSource(xmlDoc);      XSLTInputSource xslIn = new XSLTInputSource(xslDoc);      ByteArrayOutputStream baos = new ByteArrayOutputStream();      XSLTResultTarget xmlOut = new XSLTResultTarget(baos);      XSLTProcessor processor = XSLTProcessorFactory.getProcessor();      processor.process(xmlIn, xslIn, xmlOut);      baos.close();      String result = baos.toString();      //System.out.print(result);      return parseXMLFromString(result);    } catch (SAXException saxe) {      throw new XMLHelperException("Unable to perform transform", saxe);    } catch (IOException ioe) {      throw new XMLHelperException("Unable to perform transform", ioe);    }  }  /**   * Given an XML document, a pretty (tab delimited and with line breaks) representation is   * sent to the specified <CODE>PrintStream</CODE> object. This is the most convenient way to   * output an XML document to standard out.   *   * @param doc The XML document to output   * @param stream The stream to send the result to. (e.g. <CODE>System.out</CODE> or <CODE>System.err</CODE>)   * @exception XMLHelperException Thrown in the event of an I/O error.   */  public static void outputXML(Document doc, PrintStream stream) throws XMLHelperException {    try {      OutputFormat of = new OutputFormat(doc);      of.setIndenting(true);      XMLSerializer serializer = new XMLSerializer(stream, of);      serializer.serialize(doc);    } catch (IOException ioe) {      throw new XMLHelperException("Unable to write to the given print stream", ioe);    }  }  /**   * Given an XML document and a relative or absolute path name for a file, writes   * the XML document to that file location. The format of the written XML document   * will be tab delimited and line breaked. The file name will need to use the system   * dependent separator character(s) for directory navigation.   *   * @param doc The XML document to output.   * @param fileName A file name either relative to the running Java virtual machine, or absolute.   * @exception XMLHelperException Thrown if an I/O error occurs.   */  public static void outputXMLToFile(Document doc, String fileName) throws XMLHelperException {    try {      OutputFormat of = new OutputFormat(doc);      of.setIndenting(true);      File f = new File(fileName);      FileOutputStream fos = new FileOutputStream(f);      XMLSerializer serializer = new XMLSerializer(fos, of);      serializer.serialize(doc);      fos.close();    } catch (IOException ioe) {      throw new XMLHelperException("Unable to write to the given file", ioe);    }  }  /**   * A utility method for converting an XML document to a <CODE>String</CODE> object.   * This method is included in case the user would like to do their own I/O in a way   * not specified in this class.   *   * @param doc The XML document to be encoded as a <CODE>String</CODE>.   * @return The XML document as text in a <CODE>String</CODE>.   */  public static String convertXMLToString(Document doc) throws XMLHelperException {    try {      OutputFormat of = new OutputFormat(doc);      of.setIndenting(true);      StringWriter sw = new StringWriter();      XMLSerializer serializer = new XMLSerializer(sw, of);      serializer.serialize(doc);      return sw.toString();    } catch (IOException ioe) {      throw new XMLHelperException("Unable to write to the string", ioe);    }  }  public static void mergeXML(Element mergeToXML, Element mergeFromXML, boolean childrenOnly) {    Document toDoc = mergeToXML.getOwnerDocument();    Element copyElem = (Element)(toDoc.importNode(mergeFromXML,true));    if (childrenOnly) {      NodeList nlist = copyElem.getChildNodes();      for (int i=0; i < nlist.getLength(); i++) {        org.w3c.dom.Node n = nlist.item(i);        mergeToXML.appendChild(n);      }      return;    } else {      mergeToXML.appendChild(copyElem);    }  }  public static Document tidyHTML(String url) throws XMLHelperException {    return tidyHTML(convertStringToURL(url));  }  /**   * Retrieves an HTML page from a java <CODE>URL</CODE> object and   * attempts to clean up the source of that HTML to remove author errors. If   * successful, the resulting document is converted to XHTML and returned as   * an XML document.   *   * @param url A <CODE>URL</CODE> object hopefully pointing to an HTML file.   * @return an XML document representing the XHTML of the source of the HTML file.   * @exception XMLHelperException Thrown if the HTML source can not be obtained or the tool is unable to convert the source to XML.   */  public static Document tidyHTML(URL url) throws XMLHelperException {    try {      URLConnection inConnection = url.openConnection();      if (inConnection.getContentType().startsWith("text/xml") ||          inConnection.getContentType().startsWith("text/xhtml")) {        // All ready an XML source        return parseXMLFromURL(url);      } else if (inConnection.getContentType().startsWith("text/html")) {        // An HTML source        InputStream is = inConnection.getInputStream();        // Clean the input stream        ByteArrayOutputStream out = new ByteArrayOutputStream();        int totalBytes = 0;        byte[] buffer = new byte[36384];        while (true) {          int bytesRead = is.read(buffer, 0, buffer.length);          if (bytesRead < 0) break;          // Remove binary below space except tab and newline          for (int i=0; i < bytesRead; i++) {            byte b = buffer[i];            //if (b < 32 && b!= 10 && b != 13 && b != 9) b = 32;            buffer[i] = b;          }          out.write(buffer, 0, bytesRead);          totalBytes += bytesRead;        }        is.close();        out.close();        String outContent=out.toString();        InputStream in = new ByteArrayInputStream(out.toByteArray());        //org.w3c.tidy.TagTable tags = org.w3c.tidy.TagTable.getDefaultTagTable();        //tags.defineBlockTag("script");        Tidy tidy = new Tidy();        tidy.setShowWarnings(false);        tidy.setXmlOut(true);        tidy.setXmlPi(false);        tidy.setDocType("omit");        tidy.setXHTML(false);        tidy.setRawOut(true);        tidy.setNumEntities(true);        tidy.setQuiet(true);        tidy.setFixComments(true);        tidy.setIndentContent(true);        tidy.setCharEncoding(org.w3c.tidy.Configuration.DOCTYPE_USER);        ByteArrayOutputStream baos = new ByteArrayOutputStream();        org.w3c.tidy.Node tNode = tidy.parse(in, baos);        String result = baos.toString();        //result=new String(result.getBytes("UTF-8"), "GB2312");        //System.out.print(o);/*        String result="";        getUTF8 gu=new getUTF8();        try{          //result=gu.change(outContent);          result=gu.change(o);          }catch(Exception e){}        */        //System.out.print(result);        // Strip the DOCTYPE and script elements - This is an optional step        int startIndex = 0;        int endIndex = 0;        if ((startIndex = result.indexOf("<!DOCTYPE")) >= 0) {          endIndex = result.indexOf(">",startIndex);          result = result.substring(0,startIndex) +                   result.substring(endIndex + 1, result.length());        }        while ((startIndex = result.indexOf("<script")) >= 0) {          endIndex = result.indexOf("</script>");          result = result.substring(0,startIndex) +                   result.substring(endIndex + 9, result.length());        }        in.close();        baos.close();        return parseXMLFromString(result);      } else {        throw new XMLHelperException("Unable to tidy content type: " +                                     inConnection.getContentType());      }    } catch (IOException ioe) {      throw new XMLHelperException("Unable to perform input/output", ioe);    }  }  // A utility method for converting a String encoding of a URL to a URL  private static URL convertStringToURL(String url) throws XMLHelperException {    try {      return new URL(url);    } catch (MalformedURLException murle) {      throw new XMLHelperException(url + " is not a well formed URL", murle);    }  }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -