📄 crawlcontroller.java
字号:
*/ public File getLogsDir() { File f = null; try { f = getSettingsDir(CrawlOrder.ATTR_LOGS_PATH); } catch (AttributeNotFoundException e) { LOGGER.severe("Failed get of logs directory: " + e.getMessage()); } return f; } /** * Return fullpath to the directory named by <code>key</code> * in settings. * If directory does not exist, it and all intermediary dirs * will be created. * @param key Key to use going to settings. * @return Full path to directory named by <code>key</code>. * @throws AttributeNotFoundException */ public File getSettingsDir(String key) throws AttributeNotFoundException { String path = (String)order.getAttribute(null, key); File f = new File(path); if (!f.isAbsolute()) { f = new File(disk.getPath(), path); } if (!f.exists()) { f.mkdirs(); } return f; } /** * Setup the statistics tracker. * The statistics object must be created before modules can use it. * Do it here now so that when modules retrieve the object from the * controller during initialization (which some do), its in place. * @throws InvalidAttributeValueException * @throws FatalConfigurationException */ private void setupStatTracking() throws InvalidAttributeValueException, FatalConfigurationException { MapType loggers = order.getLoggers(); final String cstName = "crawl-statistics"; if (loggers.isEmpty(null)) { if (!isCheckpointRecover() && this.statistics == null) { this.statistics = new StatisticsTracker(cstName); } loggers.addElement(null, (StatisticsTracker)this.statistics); } if (isCheckpointRecover()) { restoreStatisticsTracker(loggers, cstName); } for (Iterator it = loggers.iterator(null); it.hasNext();) { StatisticsTracking tracker = (StatisticsTracking)it.next(); tracker.initialize(this); if (this.statistics == null) { this.statistics = tracker; } } } protected void restoreStatisticsTracker(MapType loggers, String replaceName) throws FatalConfigurationException { try { // Add the deserialized statstracker to the settings system. loggers.removeElement(loggers.globalSettings(), replaceName); loggers.addElement(loggers.globalSettings(), (StatisticsTracker)this.statistics); } catch (Exception e) { throw convertToFatalConfigurationException(e); } } protected FatalConfigurationException convertToFatalConfigurationException(Exception e) { FatalConfigurationException fce = new FatalConfigurationException("Converted exception: " + e.getMessage()); fce.setStackTrace(e.getStackTrace()); return fce; } private void setupLogs() throws IOException { String logsPath = logsDisk.getAbsolutePath() + File.separatorChar; uriProcessing = Logger.getLogger(LOGNAME_CRAWL + "." + logsPath); runtimeErrors = Logger.getLogger(LOGNAME_RUNTIME_ERRORS + "." + logsPath); localErrors = Logger.getLogger(LOGNAME_LOCAL_ERRORS + "." + logsPath); uriErrors = Logger.getLogger(LOGNAME_URI_ERRORS + "." + logsPath); progressStats = Logger.getLogger(LOGNAME_PROGRESS_STATISTICS + "." + logsPath); this.fileHandlers = new HashMap<Logger,FileHandler>(); setupLogFile(uriProcessing, logsPath + LOGNAME_CRAWL + CURRENT_LOG_SUFFIX, new UriProcessingFormatter(), true); setupLogFile(runtimeErrors, logsPath + LOGNAME_RUNTIME_ERRORS + CURRENT_LOG_SUFFIX, new RuntimeErrorFormatter(), true); setupLogFile(localErrors, logsPath + LOGNAME_LOCAL_ERRORS + CURRENT_LOG_SUFFIX, new LocalErrorFormatter(), true); setupLogFile(uriErrors, logsPath + LOGNAME_URI_ERRORS + CURRENT_LOG_SUFFIX, new UriErrorFormatter(), true); setupLogFile(progressStats, logsPath + LOGNAME_PROGRESS_STATISTICS + CURRENT_LOG_SUFFIX, new StatisticsLogFormatter(), true); } private void setupLogFile(Logger logger, String filename, Formatter f, boolean shouldManifest) throws IOException, SecurityException { GenerationFileHandler fh = new GenerationFileHandler(filename, true, shouldManifest); fh.setFormatter(f); logger.addHandler(fh); addToManifest(filename, MANIFEST_LOG_FILE, shouldManifest); logger.setUseParentHandlers(false); this.fileHandlers.put(logger, fh); } protected void rotateLogFiles(String generationSuffix) throws IOException { if (this.state != PAUSED && this.state != CHECKPOINTING) { throw new IllegalStateException("Pause crawl before requesting " + "log rotation."); } for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) { Logger l = (Logger)i.next(); GenerationFileHandler gfh = (GenerationFileHandler)fileHandlers.get(l); GenerationFileHandler newGfh = gfh.rotate(generationSuffix, CURRENT_LOG_SUFFIX); if (gfh.shouldManifest()) { addToManifest((String) newGfh.getFilenameSeries().get(1), MANIFEST_LOG_FILE, newGfh.shouldManifest()); } l.removeHandler(gfh); l.addHandler(newGfh); fileHandlers.put(l, newGfh); } } /** * Close all log files and remove handlers from loggers. */ public void closeLogFiles() { for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) { Logger l = (Logger)i.next(); GenerationFileHandler gfh = (GenerationFileHandler)fileHandlers.get(l); gfh.close(); l.removeHandler(gfh); } } /** * Sets the values for max bytes, docs and time based on crawl order. */ private void setThresholds() { try { maxBytes = ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_BYTES_DOWNLOAD)) .longValue(); } catch (Exception e) { maxBytes = 0; } try { maxDocument = ((Long) order .getAttribute(CrawlOrder.ATTR_MAX_DOCUMENT_DOWNLOAD)) .longValue(); } catch (Exception e) { maxDocument = 0; } try { maxTime = ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_TIME_SEC)) .longValue(); } catch (Exception e) { maxTime = 0; } } /** * @return Object this controller is using to track crawl statistics */ public StatisticsTracking getStatistics() { return statistics==null ? new StatisticsTracker("crawl-statistics"): this.statistics; } /** * Send crawl change event to all listeners. * @param newState State change we're to tell listeners' about. * @param message Message on state change. * @see #sendCheckpointEvent(File) for special case event sending * telling listeners to checkpoint. */ protected void sendCrawlStateChangeEvent(Object newState, String message) { synchronized (this.registeredCrawlStatusListeners) { this.state = newState; for (Iterator i = this.registeredCrawlStatusListeners.iterator(); i.hasNext();) { CrawlStatusListener l = (CrawlStatusListener)i.next(); if (newState.equals(PAUSED)) { l.crawlPaused(message); } else if (newState.equals(RUNNING)) { l.crawlResuming(message); } else if (newState.equals(PAUSING)) { l.crawlPausing(message); } else if (newState.equals(STARTED)) { l.crawlStarted(message); } else if (newState.equals(STOPPING)) { l.crawlEnding(message); } else if (newState.equals(FINISHED)) { l.crawlEnded(message); } else if (newState.equals(PREPARING)) { l.crawlResuming(message); } else { throw new RuntimeException("Unknown state: " + newState); } if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Sent " + newState + " to " + l); } } LOGGER.fine("Sent " + newState); } } /** * Send the checkpoint event. * Has its own method apart from * {@link #sendCrawlStateChangeEvent(Object, String)} because checkpointing * throws an Exception (Didn't want to have to wrap all of the * sendCrawlStateChangeEvent in try/catches). * @param checkpointDir Where to write checkpoint state to. * @throws Exception */ protected void sendCheckpointEvent(File checkpointDir) throws Exception { synchronized (this.registeredCrawlStatusListeners) { if (this.state != PAUSED) { throw new IllegalStateException("Crawler must be completly " + "paused before checkpointing can start"); } this.state = CHECKPOINTING; for (Iterator i = this.registeredCrawlStatusListeners.iterator(); i.hasNext();) { CrawlStatusListener l = (CrawlStatusListener)i.next(); l.crawlCheckpoint(checkpointDir); if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Sent " + CHECKPOINTING + " to " + l); } } LOGGER.fine("Sent " + CHECKPOINTING); } } /** * Operator requested crawl begin */ public void requestCrawlStart() { runProcessorInitialTasks(); sendCrawlStateChangeEvent(STARTED, CrawlJob.STATUS_PENDING); String jobState; state = RUNNING; jobState = CrawlJob.STATUS_RUNNING; sendCrawlStateChangeEvent(this.state, jobState); // A proper exit will change this value. this.sExit = CrawlJob.STATUS_FINISHED_ABNORMAL; Thread statLogger = new Thread(statistics); statLogger.setName("StatLogger"); statLogger.start(); frontier.start(); } /** * Called when the last toethread exits. */ protected void completeStop() { LOGGER.fine("Entered complete stop."); // Run processors' final tasks runProcessorFinalTasks(); // Ok, now we are ready to exit. sendCrawlStateChangeEvent(FINISHED, this.sExit); synchronized (this.registeredCrawlStatusListeners) { // Remove all listeners now we're done with them. this.registeredCrawlStatusListeners. removeAll(this.registeredCrawlStatusListeners); this.registeredCrawlStatusListeners = null; } closeLogFiles(); // Release reference to logger file handler instances. this.fileHandlers = null; this.uriErrors = null; this.uriProcessing = null; this.localErrors = null; this.runtimeErrors = null; this.progressStats = null; this.reports = null; this.manifest = null; // Do cleanup. this.statistics = null; this.frontier = null; this.disk = null; this.scratchDisk = null; this.order = null; this.scope = null; if (this.settingsHandler != null) { this.settingsHandler.cleanup(); } this.settingsHandler = null; this.reserveMemory = null; this.processorChains = null; if (this.serverCache != null) { this.serverCache.cleanup(); this.serverCache = null; } if (this.checkpointer != null) { this.checkpointer.cleanup(); this.checkpointer = null; } if (this.bdbEnvironment != null) { try { this.bdbEnvironment.sync(); this.bdbEnvironment.close(); } catch (DatabaseException e) { e.printStackTrace(); } this.bdbEnvironment = null; } this.bigmaps = null; if (this.toePool != null) { this.toePool.cleanup(); // I played with launching a thread here to do cleanup of the // ToePool ThreadGroup (making sure the cleanup thread was not // in the ToePool ThreadGroup). Did this because ToePools seemed // to be sticking around holding references to CrawlController at // least. Need to spend more time looking to see that this is // still the case even after adding the above toePool#cleanup call.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -