⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crawlcontroller.java

📁 最强的爬虫工程
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
            throw new IllegalArgumentException("Message cannot be null.");        }        this.sExit = message;        beginCrawlStop();    }    /**     * Start the process of stopping the crawl.      */    public void beginCrawlStop() {        LOGGER.fine("Started.");        sendCrawlStateChangeEvent(STOPPING, this.sExit);        if (this.frontier != null) {            this.frontier.terminate();            this.frontier.unpause();        }        LOGGER.fine("Finished.");     }        /**     * Stop the crawl temporarly.     */    public synchronized void requestCrawlPause() {        if (state == PAUSING || state == PAUSED) {            // Already about to pause            return;        }        sExit = CrawlJob.STATUS_WAITING_FOR_PAUSE;        frontier.pause();        sendCrawlStateChangeEvent(PAUSING, this.sExit);        if (toePool.getActiveToeCount() == 0) {            // if all threads already held, complete pause now            // (no chance to trigger off later held thread)            completePause();        }    }    /**     * Tell if the controller is paused     * @return true if paused     */    public boolean isPaused() {        return state == PAUSED;    }        public boolean isPausing() {        return state == PAUSING;    }        public boolean isRunning() {        return state == RUNNING;    }    /**     * Resume crawl from paused state     */    public synchronized void requestCrawlResume() {        if (state != PAUSING && state != PAUSED && state != CHECKPOINTING) {            // Can't resume if not been told to pause or if we're in middle of            // a checkpoint.            return;        }        multiThreadMode();        frontier.unpause();        LOGGER.fine("Crawl resumed.");        sendCrawlStateChangeEvent(RUNNING, CrawlJob.STATUS_RUNNING);    }    /**     * @return Active toe thread count.     */    public int getActiveToeCount() {        if (toePool == null) {            return 0;        }        return toePool.getActiveToeCount();    }    private void setupToePool() {        toePool = new ToePool(this);        // TODO: make # of toes self-optimizing        toePool.setSize(order.getMaxToes());    }    /**     * @return The order file instance.     */    public CrawlOrder getOrder() {        return order;    }    /**     * @return The server cache instance.     */    public ServerCache getServerCache() {        return serverCache;    }    /**     * @param o     */    public void setOrder(CrawlOrder o) {        order = o;    }    /**     * @return The frontier.     */    public Frontier getFrontier() {        return frontier;    }    /**     * @return This crawl scope.     */    public CrawlScope getScope() {        return scope;    }    /** Get the list of processor chains.     *     * @return the list of processor chains.     */    public ProcessorChainList getProcessorChainList() {        return processorChains;    }    /** Get the first processor chain.     *     * @return the first processor chain.     */    public ProcessorChain getFirstProcessorChain() {        return processorChains.getFirstChain();    }    /** Get the postprocessor chain.     *     * @return the postprocessor chain.     */    public ProcessorChain getPostprocessorChain() {        return processorChains.getLastChain();    }    /**     * Get the 'working' directory of the current crawl.     * @return the 'working' directory of the current crawl.     */    public File getDisk() {        return disk;    }    /**     * @return Scratch disk location.     */    public File getScratchDisk() {        return scratchDisk;    }    /**     * @return State disk location.     */    public File getStateDisk() {        return stateDisk;    }    /**     * @return The number of ToeThreads     *     * @see ToePool#getToeCount()     */    public int getToeCount() {        return this.toePool == null? 0: this.toePool.getToeCount();    }    /**     * @return The ToePool     */    public ToePool getToePool() {        return toePool;    }    	/**	 * @return toepool one-line report	 */	public String oneLineReportThreads() {		// TODO Auto-generated method stub		return toePool.singleLineReport();	}    /**     * While many settings will update automatically when the SettingsHandler is     * modified, some settings need to be explicitly changed to reflect new     * settings. This includes, number of toe threads and seeds.     */    public void kickUpdate() {        toePool.setSize(order.getMaxToes());                this.scope.kickUpdate();        this.frontier.kickUpdate();        this.processorChains.kickUpdate();                // TODO: continue to generalize this, so that any major         // component can get a kick when it may need to refresh its data        setThresholds();    }	/**     * @return The settings handler.     */    public SettingsHandler getSettingsHandler() {        return settingsHandler;    }    /**     * This method iterates through processor chains to run processors' initial     * tasks.     *     */    private void runProcessorInitialTasks(){        for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {            for (Iterator ip = ((ProcessorChain) ic.next()).iterator();                    ip.hasNext(); ) {                ((Processor) ip.next()).initialTasks();            }        }    }    /**     * This method iterates through processor chains to run processors' final     * tasks.     *     */    private void runProcessorFinalTasks(){        for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {            for (Iterator ip = ((ProcessorChain) ic.next()).iterator();                    ip.hasNext(); ) {                ((Processor) ip.next()).finalTasks();            }        }    }    /**     * Kills a thread. For details see     * {@link org.archive.crawler.framework.ToePool#killThread(int, boolean)     * ToePool.killThread(int, boolean)}.     * @param threadNumber Thread to kill.     * @param replace Should thread be replaced.     * @see org.archive.crawler.framework.ToePool#killThread(int, boolean)     */    public void killThread(int threadNumber, boolean replace){        toePool.killThread(threadNumber, replace);    }    /**     * Add a file to the manifest of files used/generated by the current     * crawl.     *      * TODO: Its possible for a file to be added twice if reports are     * force generated midcrawl.  Fix.     *     * @param file The filename (with absolute path) of the file to add     * @param type The type of the file     * @param bundle Should the file be included in a typical bundling of     *           crawler files.     *     * @see #MANIFEST_CONFIG_FILE     * @see #MANIFEST_LOG_FILE     * @see #MANIFEST_REPORT_FILE     */    public void addToManifest(String file, char type, boolean bundle) {        manifest.append(type + (bundle? "+": "-") + " " + file + "\n");    }    /**     * Evaluate if the crawl should stop because it is finished.     */    public void checkFinish() {        if(atFinish()) {            beginCrawlStop();        }    }    /**     * Evaluate if the crawl should stop because it is finished,     * without actually stopping the crawl.     *      * @return true if crawl is at a finish-possible state     */    public boolean atFinish() {        return state == RUNNING && !shouldContinueCrawling();    }        private void readObject(ObjectInputStream stream)    throws IOException, ClassNotFoundException {        stream.defaultReadObject();        // Setup status listeners        this.registeredCrawlStatusListeners =            Collections.synchronizedList(new ArrayList());        // Ensure no holdover singleThreadMode        singleThreadMode = false;     }    /**     * Go to single thread mode, where only one ToeThread may     * proceed at a time. Also acquires the single lock, so      * no further threads will proceed past an      * acquireContinuePermission. Caller mush be sure to release     * lock to allow other threads to proceed one at a time.      */    public void singleThreadMode() {        this.singleThreadLock.lock();        singleThreadMode = true;     }    /**     * Go to back to regular multi thread mode, where all     * ToeThreads may proceed at once     */    public void multiThreadMode() {        this.singleThreadLock.lock();        singleThreadMode = false;         while(this.singleThreadLock.isHeldByCurrentThread()) {            this.singleThreadLock.unlock();        }    }        /**     * Proceed only if allowed, giving CrawlController a chance     * to enforce single-thread mode.     */    public void acquireContinuePermission() {        if (singleThreadMode) {            this.singleThreadLock.lock();            if(!singleThreadMode) {                // If changed while waiting, ignore                while(this.singleThreadLock.isHeldByCurrentThread()) {                    this.singleThreadLock.unlock();                }            }        } // else, permission is automatic    }    /**     * Relinquish continue permission at end of processing (allowing     * another thread to proceed if in single-thread mode).      */    public void releaseContinuePermission() {        if (singleThreadMode) {            while(this.singleThreadLock.isHeldByCurrentThread()) {                this.singleThreadLock.unlock();            }        } // else do nothing; 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -