📄 crawlcontroller.java
字号:
throw new IllegalArgumentException("Message cannot be null."); } this.sExit = message; beginCrawlStop(); } /** * Start the process of stopping the crawl. */ public void beginCrawlStop() { LOGGER.fine("Started."); sendCrawlStateChangeEvent(STOPPING, this.sExit); if (this.frontier != null) { this.frontier.terminate(); this.frontier.unpause(); } LOGGER.fine("Finished."); } /** * Stop the crawl temporarly. */ public synchronized void requestCrawlPause() { if (state == PAUSING || state == PAUSED) { // Already about to pause return; } sExit = CrawlJob.STATUS_WAITING_FOR_PAUSE; frontier.pause(); sendCrawlStateChangeEvent(PAUSING, this.sExit); if (toePool.getActiveToeCount() == 0) { // if all threads already held, complete pause now // (no chance to trigger off later held thread) completePause(); } } /** * Tell if the controller is paused * @return true if paused */ public boolean isPaused() { return state == PAUSED; } public boolean isPausing() { return state == PAUSING; } public boolean isRunning() { return state == RUNNING; } /** * Resume crawl from paused state */ public synchronized void requestCrawlResume() { if (state != PAUSING && state != PAUSED && state != CHECKPOINTING) { // Can't resume if not been told to pause or if we're in middle of // a checkpoint. return; } multiThreadMode(); frontier.unpause(); LOGGER.fine("Crawl resumed."); sendCrawlStateChangeEvent(RUNNING, CrawlJob.STATUS_RUNNING); } /** * @return Active toe thread count. */ public int getActiveToeCount() { if (toePool == null) { return 0; } return toePool.getActiveToeCount(); } private void setupToePool() { toePool = new ToePool(this); // TODO: make # of toes self-optimizing toePool.setSize(order.getMaxToes()); } /** * @return The order file instance. */ public CrawlOrder getOrder() { return order; } /** * @return The server cache instance. */ public ServerCache getServerCache() { return serverCache; } /** * @param o */ public void setOrder(CrawlOrder o) { order = o; } /** * @return The frontier. */ public Frontier getFrontier() { return frontier; } /** * @return This crawl scope. */ public CrawlScope getScope() { return scope; } /** Get the list of processor chains. * * @return the list of processor chains. */ public ProcessorChainList getProcessorChainList() { return processorChains; } /** Get the first processor chain. * * @return the first processor chain. */ public ProcessorChain getFirstProcessorChain() { return processorChains.getFirstChain(); } /** Get the postprocessor chain. * * @return the postprocessor chain. */ public ProcessorChain getPostprocessorChain() { return processorChains.getLastChain(); } /** * Get the 'working' directory of the current crawl. * @return the 'working' directory of the current crawl. */ public File getDisk() { return disk; } /** * @return Scratch disk location. */ public File getScratchDisk() { return scratchDisk; } /** * @return State disk location. */ public File getStateDisk() { return stateDisk; } /** * @return The number of ToeThreads * * @see ToePool#getToeCount() */ public int getToeCount() { return this.toePool == null? 0: this.toePool.getToeCount(); } /** * @return The ToePool */ public ToePool getToePool() { return toePool; } /** * @return toepool one-line report */ public String oneLineReportThreads() { // TODO Auto-generated method stub return toePool.singleLineReport(); } /** * While many settings will update automatically when the SettingsHandler is * modified, some settings need to be explicitly changed to reflect new * settings. This includes, number of toe threads and seeds. */ public void kickUpdate() { toePool.setSize(order.getMaxToes()); this.scope.kickUpdate(); this.frontier.kickUpdate(); this.processorChains.kickUpdate(); // TODO: continue to generalize this, so that any major // component can get a kick when it may need to refresh its data setThresholds(); } /** * @return The settings handler. */ public SettingsHandler getSettingsHandler() { return settingsHandler; } /** * This method iterates through processor chains to run processors' initial * tasks. * */ private void runProcessorInitialTasks(){ for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) { for (Iterator ip = ((ProcessorChain) ic.next()).iterator(); ip.hasNext(); ) { ((Processor) ip.next()).initialTasks(); } } } /** * This method iterates through processor chains to run processors' final * tasks. * */ private void runProcessorFinalTasks(){ for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) { for (Iterator ip = ((ProcessorChain) ic.next()).iterator(); ip.hasNext(); ) { ((Processor) ip.next()).finalTasks(); } } } /** * Kills a thread. For details see * {@link org.archive.crawler.framework.ToePool#killThread(int, boolean) * ToePool.killThread(int, boolean)}. * @param threadNumber Thread to kill. * @param replace Should thread be replaced. * @see org.archive.crawler.framework.ToePool#killThread(int, boolean) */ public void killThread(int threadNumber, boolean replace){ toePool.killThread(threadNumber, replace); } /** * Add a file to the manifest of files used/generated by the current * crawl. * * TODO: Its possible for a file to be added twice if reports are * force generated midcrawl. Fix. * * @param file The filename (with absolute path) of the file to add * @param type The type of the file * @param bundle Should the file be included in a typical bundling of * crawler files. * * @see #MANIFEST_CONFIG_FILE * @see #MANIFEST_LOG_FILE * @see #MANIFEST_REPORT_FILE */ public void addToManifest(String file, char type, boolean bundle) { manifest.append(type + (bundle? "+": "-") + " " + file + "\n"); } /** * Evaluate if the crawl should stop because it is finished. */ public void checkFinish() { if(atFinish()) { beginCrawlStop(); } } /** * Evaluate if the crawl should stop because it is finished, * without actually stopping the crawl. * * @return true if crawl is at a finish-possible state */ public boolean atFinish() { return state == RUNNING && !shouldContinueCrawling(); } private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { stream.defaultReadObject(); // Setup status listeners this.registeredCrawlStatusListeners = Collections.synchronizedList(new ArrayList()); // Ensure no holdover singleThreadMode singleThreadMode = false; } /** * Go to single thread mode, where only one ToeThread may * proceed at a time. Also acquires the single lock, so * no further threads will proceed past an * acquireContinuePermission. Caller mush be sure to release * lock to allow other threads to proceed one at a time. */ public void singleThreadMode() { this.singleThreadLock.lock(); singleThreadMode = true; } /** * Go to back to regular multi thread mode, where all * ToeThreads may proceed at once */ public void multiThreadMode() { this.singleThreadLock.lock(); singleThreadMode = false; while(this.singleThreadLock.isHeldByCurrentThread()) { this.singleThreadLock.unlock(); } } /** * Proceed only if allowed, giving CrawlController a chance * to enforce single-thread mode. */ public void acquireContinuePermission() { if (singleThreadMode) { this.singleThreadLock.lock(); if(!singleThreadMode) { // If changed while waiting, ignore while(this.singleThreadLock.isHeldByCurrentThread()) { this.singleThreadLock.unlock(); } } } // else, permission is automatic } /** * Relinquish continue permission at end of processing (allowing * another thread to proceed if in single-thread mode). */ public void releaseContinuePermission() { if (singleThreadMode) { while(this.singleThreadLock.isHeldByCurrentThread()) { this.singleThreadLock.unlock(); } } // else do nothing;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -