📄 simplespider.java
字号:
/* * Tutorial 04: "Web Robots" * compile and run with MyHtmlHrefHandler together with this file. *//** * This demos writing a simple spider using LinkExtractor * It only work with limited number of external(global) links * @version jdk 1.5.0 */import java.util.*;import java.io.*;import java.net.*;public class SimpleSpider { private Set visitedLinks=new HashSet(); private Queue linkQueue=new LinkedList(); private LinkExtractor extractor=new LinkExtractor(); protected boolean processLink(String link) throws IOException{ if(!visitedLinks.add(link))return false; System.out.println(link); extractor.parse((new URL(link)).openStream()); Iterator it=extractor.getExtLinkIterator(); while(it.hasNext()){ linkQueue.add(it.next()); } return true; } public void reset(){ visitedLinks.clear(); linkQueue.clear(); } public void work(String iLink){ reset(); linkQueue.add(iLink); for(int i=0;i<20;++i){ String link=(String) linkQueue.poll(); if(link==null){ System.out.println("!!! NO MORE LINK AVAILABLE"); break; } try{ processLink(link); }catch(IOException ex){ System.out.println("!!! A BAD LINK: "+link); } } } public static void main(String[] args){ SimpleSpider ss=new SimpleSpider(); ss.work("http://www.cs.cityu.edu.hk"); System.out.println("#######################"); ss.work("http://www.cs.cityu.edu.hk/~cs5286/2006a"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -