📄 linkproducerconsumer.java
字号:
package WebCrawler;import java.io.IOException;import java.util.Vector;import log.*;import html.HtmlPage;import io.*;public class LinkProducerConsumer extends Thread { private PgDBLogger logger; @SuppressWarnings("unused") private int number; // identification number of this class. public LinkProducerConsumer(PgDBLogger logger, int number) { this.logger = logger; this.number = number; //this.setPriority(Thread.MAX_PRIORITY);
this.start(); } public void run() { LogItem value = null; try { // Get the next unvisited link. // the visited column of value has been automatically set to the current stamp. while( logger.isAvailable() ) { value = logger.get(); if( null==value ) continue; MyPrintStream.out.println("[LinkProducerConsumer.run] #" + this.number + " got: " + value.toString()); // Get the target value of this unvisited link. Link dest = value.target(); MyPrintStream.out.println("[LinkProducerConsumer.run] START TO PARSE: " + dest.getURL()); // 1. Parse the target URL: HtmlPage hp = null; try { hp = new HtmlPage( dest.getURL(), MyPrintStream.out ); } catch (IOException e) { MyPrintStream.out.println("[LinkProducerConsumer.run] HtmlPage error: " + dest.getURL()); } Vector<Link> links = hp.links(); if( null!=links && null!=dest.getURL() ) { // 2. Store the links found in the target URL into the database: for( int i=0; i<links.size(); i++ ) { Link target = links.elementAt(i); // Do not process non HTML URLs nor non HTTP URLs: if( null!=target && (!target.isHTML() || !target.isHTTP() || !dest.getDomain().equals(target.getDomain())) ) continue; if( null!=target.getURL() ) { LogItem item = new LogItem( dest, target, "null" ); try { logger.put( item ); MyPrintStream.out.println("[LinkProducerConsumer.run] put: " + item.toString()); } catch (InterruptedException e) { MyPrintStream.out.println("[LinkProducerConsumer.run] put error: " + item.toString()); } } // if null } // for i //sleep((int)(Math.random() * 500)); } // if else { MyPrintStream.out.println("[LinkProducerConsumer.run] No links found in "+dest.getURL() ); } MyPrintStream.out.println("[LinkProducerConsumer.run] END TO PARSE: " + dest.getURL()); } // while } catch (InterruptedException e) { } } // run}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -