📄 saxspider.java
字号:
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;
import java.util.*;
public class SAXSpider extends DefaultHandler
{
private List spideredURLs=new Vector();
private LinkedList queue=new LinkedList();
private String currentURL;
private XMLReader parser;
public SAXSpider(XMLReader parser, String url)
{
this.parser=parser;
this.currentURL=url;
}
public void endDocument()
{
spideredURLs.add(currentURL);
System.out.println("Visited"+currentURL);
String url;
try{
url=(String)queue.removeLast();
}
catch(NoSuchElementException e){
return;
}
this.currentURL=url;
try{
parser.parse(url);
}
catch(Exception ex){
this.endDocument();
}
}
public void startElement(String namespaceURI, String localName,
String qualifiedName, Attributes atts)
{
String type
=atts.getValue("http://www.w3.org/1999/xlink","type");
if(type!=null)
{
String href
=atts.getValue("http://www3.w3.org/1999/xlink","href");
if(href!=null)
{
if(!spideredURLs.contains(href))
{
queue.addFirst(href);
}
}
}
}
public static void main(String[] args)
{
if(args.length==0)
{
System.out.println("Usage:java SAXSpider URL1");
}
String url=args[0];
try{
XMLReader parser=XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser"
);
ContentHandler spider= new SAXSpider(parser,url);
parser.setContentHandler(spider);
parser.parse(url);
}
catch(Exception e){
System.err.println(e);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -