📄 saxspider.java

📁 随书的代码
💻 JAVA
字号:
import org.xml.sax.*;import org.xml.sax.helpers.*;import java.io.*;import java.util.*;public class SAXSpider extends DefaultHandler {  // Need to keep track of where we've been   // so we don't get stuck in an infinite loop  private List spideredURIs = new Vector();  // This linked list keeps track of where we're going.  // Although the LinkedList class does not guarantee queue like  // access, I always access it in a first-in/first-out fashion.  private LinkedList queue = new LinkedList();    private String    currentURI;  private XMLReader parser;    public SAXSpider(XMLReader parser, String uri) {    this.parser = parser;    this.currentURI = uri;  }    public void endDocument() {         spideredURIs.add(currentURI);    System.out.println("Visited " + currentURI);    String uri;    try {      uri = (String) queue.removeLast();    }    catch (NoSuchElementException e) {      // The queue is empty; we're finished.      return;    }    this.currentURI = uri;    try {      parser.parse(uri);    }    catch (Exception e) {       // just skip this one and move on to the next      this.endDocument();    }      }  public void startElement(String namespaceURI, String localName,    String qualifiedName, Attributes atts) {        String type      = atts.getValue("http://www.w3.org/1999/xlink", "type");    if (type != null) {      String href        = atts.getValue("http://www.w3.org/1999/xlink", "href");      if (href != null) {        if (!spideredURIs.contains(href)) {          queue.addFirst(href);        }      }    }      }    public static void main(String[] args) {        if (args.length == 0) {      System.out.println("Usage: java SAXSpider URL1");     }     String uri = args[0];        try {      XMLReader parser = XMLReaderFactory.createXMLReader(        "org.apache.xerces.parsers.SAXParser"      );          // Install the ContentHandler       ContentHandler spider = new SAXSpider(parser, uri);       parser.setContentHandler(spider);    parser.parse(uri);    }    catch (Exception e) {      System.err.println(e);    }          } // end main} // end SAXSpider
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -