⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 webcrawler.java

📁 java concurrency in practice 源码. JAVA并发设计
💻 JAVA
字号:
package net.jcip.examples;import java.net.URL;import java.util.*;import java.util.concurrent.*;import net.jcip.annotations.*;import static java.util.concurrent.TimeUnit.MILLISECONDS;/** * WebCrawler * <p/> * Using TrackingExecutorService to save unfinished tasks for later execution * * @author Brian Goetz and Tim Peierls */public abstract class WebCrawler {    private volatile TrackingExecutor exec;    @GuardedBy("this") private final Set<URL> urlsToCrawl = new HashSet<URL>();    private final ConcurrentMap<URL, Boolean> seen = new ConcurrentHashMap<URL, Boolean>();    private static final long TIMEOUT = 500;    private static final TimeUnit UNIT = MILLISECONDS;    public WebCrawler(URL startUrl) {        urlsToCrawl.add(startUrl);    }    public synchronized void start() {        exec = new TrackingExecutor(Executors.newCachedThreadPool());        for (URL url : urlsToCrawl) submitCrawlTask(url);        urlsToCrawl.clear();    }    public synchronized void stop() throws InterruptedException {        try {            saveUncrawled(exec.shutdownNow());            if (exec.awaitTermination(TIMEOUT, UNIT))                saveUncrawled(exec.getCancelledTasks());        } finally {            exec = null;        }    }    protected abstract List<URL> processPage(URL url);    private void saveUncrawled(List<Runnable> uncrawled) {        for (Runnable task : uncrawled)            urlsToCrawl.add(((CrawlTask) task).getPage());    }    private void submitCrawlTask(URL u) {        exec.execute(new CrawlTask(u));    }    private class CrawlTask implements Runnable {        private final URL url;        CrawlTask(URL url) {            this.url = url;        }        private int count = 1;        boolean alreadyCrawled() {            return seen.putIfAbsent(url, true) != null;        }        void markUncrawled() {            seen.remove(url);            System.out.printf("marking %s uncrawled%n", url);        }        public void run() {            for (URL link : processPage(url)) {                if (Thread.currentThread().isInterrupted())                    return;                submitCrawlTask(link);            }        }        public URL getPage() {            return url;        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -