📄 crawljob.java
字号:
this.controller.getState().toString(): "Illegal State"; } /** * Get the current status of this CrawlJob * * @return The current status of this CrawlJob * (see constants defined here beginning with STATUS) */ public String getStatus() { return this.status; } /** * Returns the settings handler for this job. It will have been initialized. * @return the settings handler for this job. */ public XMLSettingsHandler getSettingsHandler() { return this.settingsHandler; } /** * Is this a new job? * @return True if is new. */ public boolean isNew() { return isNew; } /** * Set if the job is considered to be a profile * @return True if is a profile. */ public boolean isProfile() { return isProfile; } /** * Set if the job is considered a new job or not. * @param b Is the job considered to be new. */ public void setNew(boolean b) { isNew = b; writeJobFile(); //Save changes } /** * Returns true if the job is being crawled. * @return true if the job is being crawled */ public boolean isRunning() { return isRunning; } /** * Set if job is being crawled. * @param b Is job being crawled. */ protected void setRunning(boolean b) { isRunning = b; writeJobFile(); // Save changes //TODO: Job ending -> Save statistics tracker. //TODO: This is likely to happen as the CrawlEnding event occurs, // need to ensure that the StatisticsTracker is saved to disk on // CrawlEnded. Maybe move responsibility for this into the // StatisticsTracker? } protected void unregisterMBean() { // Unregister current job from JMX agent, if there one. if (this.mbeanServer == null) { return; } try { this.mbeanServer.unregisterMBean(this.mbeanName); this.mbeanServer = null; } catch (Exception e) { logger.log(Level.SEVERE, "Failed with " + this.mbeanName, e); } } /** * Subclass of crawlcontroller that unregisters beans when stopped. * Done as subclass so CrawlController doesn't get any JMX (or 'CrawlJob') * pollution, so for sure CrawlJob is unregistered with JMX and so any * listeners on the CrawlJob get a chance to get crawl ended message * (These latter notifications may not actually be getting through -- TBD). * <p>TODO: This override dirtys the data model since CC knows about CJs. * The facility provided by this class emitting events and statistics so * they can be read by JMX needs to go back into CC. Probably best to * registering in JMX the CC, rather than CJ. Lets do this in Heritrix 2.0 * since means changing the JMX API some. */ public class MBeanCrawlController extends CrawlController implements Serializable { private static final long serialVersionUID = -4608537998168407222L; private CrawlJob cj = null; private CompositeType ct = null; public CrawlJob getCrawlJob() { return this.cj; } public void setCrawlJob(CrawlJob cj) { this.cj = cj; } public void progressStatisticsEvent(final EventObject e) { super.progressStatisticsEvent(e); if (this.cj.getMbeanName() == null) { // Can be null around job startup. Return w/o doing anything. return; } Map s = ((StatisticsTracking)e.getSource()).getProgressStatistics(); // Convert the statistics to OpenType CompositeData and add as // user data to Notification. CompositeData cd = null; try { if (this.ct == null) { this.ct = JmxUtils.createCompositeType(s, PROG_STATS, PROG_STATS + " for " + this.cj.getMbeanName()); } cd = new CompositeDataSupport(this.ct, s); } catch (OpenDataException ode) { ode.printStackTrace(); } if (cd != null) { Notification n = new Notification(PROG_STATS, this.cj.getMbeanName(), getNotificationsSequenceNumber(), ((StatisticsTracking)e.getSource()). getProgressStatisticsLine()); n.setUserData(cd); this.cj.sendNotification(n); } } protected void completeStop() { try { super.completeStop(); } finally { if (this.cj != null) { this.cj.unregisterMBean(); } this.cj = null; } } } protected CrawlController setupCrawlController() throws InitializationException { CrawlController controller = null; // Check if we're to do a checkpoint recover. If so, deserialize // the checkpoint's CrawlController and use that in place of a new // CrawlController instance. Checkpoint cp = CrawlController. getCheckpointRecover(getSettingsHandler().getOrder()); if (cp != null) { try { controller = (MBeanCrawlController)CheckpointUtils. readObjectFromFile(MBeanCrawlController.class, cp.getDirectory()); } catch (FileNotFoundException e) { throw new InitializationException(e); } catch (IOException e) { throw new InitializationException(e); } catch (ClassNotFoundException e) { throw new InitializationException(e); } } else { controller = new MBeanCrawlController(); } return controller; } protected CrawlController createCrawlController() { return new MBeanCrawlController(); } public void setupForCrawlStart() throws InitializationException { try { this.controller = setupCrawlController(); // Register as listener to get job finished notice. this.controller.addCrawlStatusListener(this); this.controller.initialize(getSettingsHandler()); // Set the crawl job this MBeanCrawlController needs to worry about. ((MBeanCrawlController)this.controller).setCrawlJob(this); // Create our mbean description and register our crawljob. this.openMBeanInfo = buildMBeanInfo(); try { Heritrix.registerMBean(this, getJmxJobName(), CRAWLJOB_JMXMBEAN_TYPE); } catch (InstanceAlreadyExistsException e) { throw new InitializationException(e); } catch (MBeanRegistrationException e) { throw new InitializationException(e); } catch (NotCompliantMBeanException e) { throw new InitializationException(e); } } catch (InitializationException e) { // Can't load current job since it is misconfigured. setStatus(CrawlJob.STATUS_MISCONFIGURED); setErrorMessage("A fatal InitializationException occured when " + "loading job:\n" + e.getMessage()); // Log to stdout so its seen in logs as well as in UI. e.printStackTrace(); this.controller = null; throw e; } setStatus(CrawlJob.STATUS_RUNNING); setRunning(true); } public void stopCrawling() { if(this.controller != null) { this.controller.requestCrawlStop(); } } /** * @return One-line Frontier report. */ public String getFrontierOneLine() { if (this.controller == null || this.controller.getFrontier() == null) { return "Crawler not running"; } return this.controller.getFrontier().singleLineReport(); } /** * @param reportName Name of report to write. * @return A report of the frontier's status. */ public String getFrontierReport(final String reportName) { if (this.controller == null || this.controller.getFrontier() == null) { return "Crawler not running"; } return ArchiveUtils.writeReportToString(this.controller.getFrontier(), reportName); } /** * Write the requested frontier report to the given PrintWriter * @param reportName Name of report to write. * @param writer Where to write to. */ public void writeFrontierReport(String reportName, PrintWriter writer) { if (this.controller == null || this.controller.getFrontier() == null) { writer.println("Crawler not running."); return; } this.controller.getFrontier().reportTo(reportName,writer); } /** * @return One-line threads report. */ public String getThreadOneLine() { if (this.controller == null) { return "Crawler not running"; } return this.controller.oneLineReportThreads(); } /** * Get the CrawlControllers ToeThreads report for the running crawl. * @return The CrawlControllers ToeThreads report */ public String getThreadsReport() { if (this.controller == null) { return "Crawler not running"; } return ArchiveUtils.writeReportToString(this.controller.getToePool(), null); } /** * Write the requested threads report to the given PrintWriter * @param reportName Name of report to write. * @param writer Where to write to. */ public void writeThreadsReport(String reportName, PrintWriter writer) { if (this.controller == null || this.controller.getFrontier() == null) { writer.println("Crawler not running."); return; } this.controller.getToePool().reportTo(reportName, writer); } /** * Kills a thread. For details see * {@link org.archive.crawler.framework.ToePool#killThread(int, boolean) * ToePool.killThread(int, boolean)}. * @param threadNumber Thread to kill. * @param replace Should thread be replaced. * @see org.archive.crawler.framework.ToePool#killThread(int, boolean) */ public void killThread(int threadNumber, boolean replace) { if (this.controller == null) { return; } this.controller.killThread(threadNumber, replace); } /** * Get the Processors report for the running crawl. * @return The Processors report for the running crawl. */ public String getProcessorsReport() { if (this.controller == null) { return "Crawler not running"; } return ArchiveUtils.writeReportToString(this.controller, CrawlController.PROCESSORS_REPORT); } /** * Returns the directory where the configuration files for this job are * located. * * @return the directory where the configuration files for this job are * located */ public String getSettingsDirectory() { return settingsHandler.getOrderFile().getPath(); } /** * Returns the path of the job's base directory. For profiles this is always
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -