📄 crawljobhandler.java
字号:
throws FatalConfigurationException { // See what the recover story is. File recover = null; try { if (recovery != null && recovery.length() > 0 && recovery.equals(RECOVER_LOG)) { // Then we're to do a recovery based off the RecoveryJournal // recover.gz log. File dir = baseOn.getSettingsHandler().getOrder() .getSettingsDir(CrawlOrder.ATTR_LOGS_PATH); // Add name of recover file. We're hardcoding it as // 'recover.gz'. recover = new File(dir, FrontierJournal.LOGNAME_RECOVER); } else if (recovery != null && recovery.length() > 0) { // Must be name of a checkpoint to use. recover = new File(baseOn.getSettingsHandler(). getOrder().getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH), recovery); } } catch (AttributeNotFoundException e1) { throw new FatalConfigurationException( "AttributeNotFoundException occured while setting up" + "new job/profile " + name + " \n" + e1.getMessage()); } CrawlJob cj = createNewJob(baseOn.getSettingsHandler().getOrderFile(), name, description, seeds, priority); updateRecoveryPaths(recover, cj.getSettingsHandler(), name); return cj; } /** * Creates a new job. The new job will be returned and also registered as * the handler's 'new job'. The new job will be based on the settings * provided but created in a new location on disk. * @param orderFile Order file to use as the template for the new job. * @param name The name of the new job. * @param description Descriptions of the job. * @param seeds The contents of the new settings' seed file. * * @return The new crawl job. * @throws FatalConfigurationException If a problem occurs creating the * settings. */ public CrawlJob newJob(final File orderFile, final String name, final String description, final String seeds) throws FatalConfigurationException { return createNewJob(orderFile, name, description, seeds, CrawlJob.PRIORITY_AVERAGE); } protected void checkDirectory(File dir) throws FatalConfigurationException { if (dir == null) { return; } if (!dir.exists() && !dir.canRead()) { throw new FatalConfigurationException(dir.getAbsolutePath() + " does not exist or is unreadable"); } } protected CrawlJob createNewJob(final File orderFile, final String name, final String description, final String seeds, final int priority) throws FatalConfigurationException { if (newJob != null) { //There already is a new job. Discard it. discardNewJob(); } String UID = getNextJobUID(); File jobDir; jobDir = new File(this.jobsDir, name + "-" + UID); CrawlJobErrorHandler errorHandler = new CrawlJobErrorHandler(); XMLSettingsHandler handler = createSettingsHandler(orderFile, name, description, seeds, jobDir, errorHandler, "order.xml", "seeds.txt"); this.newJob = new CrawlJob(UID, name, handler, errorHandler, priority, jobDir); return this.newJob; } /** * Creates a new profile. The new profile will be returned and also * registered as the handler's 'new job'. The new profile will be based on * the settings provided but created in a new location on disk. * * @param baseOn * A CrawlJob (with a valid settingshandler) to use as the * template for the new profile. * @param name * The name of the new profile. * @param description * Description of the new profile * @param seeds * The contents of the new profiles' seed file * @return The new profile. * @throws FatalConfigurationException * @throws IOException */ public CrawlJob newProfile(CrawlJob baseOn, String name, String description, String seeds) throws FatalConfigurationException, IOException { File profileDir = new File(getProfilesDirectory().getAbsoluteFile() + File.separator + name); CrawlJobErrorHandler cjseh = new CrawlJobErrorHandler(Level.SEVERE); CrawlJob newProfile = new CrawlJob(name, createSettingsHandler(baseOn.getSettingsHandler().getOrderFile(), name, description, seeds, profileDir, cjseh, "order.xml", "seeds.txt"), cjseh); addProfile(newProfile); return newProfile; } /** * Creates a new settings handler based on an existing job. Basically all * the settings file for the 'based on' will be copied to the specified * directory. * * @param orderFile Order file to base new order file on. Cannot be null. * @param name Name for the new settings * @param description Description of the new settings. * @param seeds The contents of the new settings' seed file. * @param newSettingsDir * @param errorHandler * @param filename Name of new order file. * @param seedfile Name of new seeds file. * * @return The new settings handler. * @throws FatalConfigurationException * If there are problems with reading the 'base on' * configuration, with writing the new configuration or it's * seed file. */ protected XMLSettingsHandler createSettingsHandler( final File orderFile, final String name, final String description, final String seeds, final File newSettingsDir, final CrawlJobErrorHandler errorHandler, final String filename, final String seedfile) throws FatalConfigurationException { XMLSettingsHandler newHandler = null; try { newHandler = new XMLSettingsHandler(orderFile); if(errorHandler != null){ newHandler.registerValueErrorHandler(errorHandler); } newHandler.setErrorReportingLevel(errorHandler.getLevel()); newHandler.initialize(); } catch (InvalidAttributeValueException e2) { throw new FatalConfigurationException( "InvalidAttributeValueException occured while creating" + " new settings handler for new job/profile\n" + e2.getMessage()); } // Make sure the directory exists. newSettingsDir.mkdirs(); try { // Set the seed file ((ComplexType)newHandler.getOrder().getAttribute("scope")) .setAttribute(new Attribute("seedsfile", seedfile)); } catch (AttributeNotFoundException e1) { throw new FatalConfigurationException( "AttributeNotFoundException occured while setting up" + "new job/profile\n" + e1.getMessage()); } catch (InvalidAttributeValueException e1) { throw new FatalConfigurationException( "InvalidAttributeValueException occured while setting" + "up new job/profile\n" + e1.getMessage()); } catch (MBeanException e1) { throw new FatalConfigurationException( "MBeanException occured while setting up new" + " job/profile\n" + e1.getMessage()); } catch (ReflectionException e1) { throw new FatalConfigurationException( "ReflectionException occured while setting up" + " new job/profile\n" + e1.getMessage()); } File newFile = new File(newSettingsDir.getAbsolutePath(), filename); try { newHandler.copySettings(newFile, (String)newHandler.getOrder() .getAttribute(CrawlOrder.ATTR_SETTINGS_DIRECTORY)); } catch (IOException e3) { // Print stack trace to help debug issue where cannot create // new job from an old that has overrides. e3.printStackTrace(); throw new FatalConfigurationException( "IOException occured while writing new settings files" + " for new job/profile\n" + e3.getMessage()); } catch (AttributeNotFoundException e) { throw new FatalConfigurationException( "AttributeNotFoundException occured while writing new" + " settings files for new job/profile\n" + e.getMessage()); } catch (MBeanException e) { throw new FatalConfigurationException( "MBeanException occured while writing new settings files" + " for new job/profile\n" + e.getMessage()); } catch (ReflectionException e) { throw new FatalConfigurationException( "ReflectionException occured while writing new settings" + " files for new job/profile\n" + e.getMessage()); } CrawlerSettings orderfile = newHandler.getSettingsObject(null); orderfile.setName(name); orderfile.setDescription(description); if (seeds != null) { BufferedWriter writer = null; try { writer = new BufferedWriter(new FileWriter(newHandler .getPathRelativeToWorkingDirectory(seedfile))); try { writer.write(seeds); } finally { writer.close(); } } catch (IOException e) { throw new FatalConfigurationException( "IOException occured while writing seed file for new" + " job/profile\n" + e.getMessage()); } } return newHandler; } /** * @param recover * Source to use recovering. Can be full path to a recovery log * or full path to a checkpoint src dir. * @param sh * Settings Handler to update. * @param jobName * Name of this job. * @throws FatalConfigurationException */ protected void updateRecoveryPaths(final File recover, final SettingsHandler sh, final String jobName) throws FatalConfigurationException { if (recover == null) { return; } checkDirectory(recover); try { // Set 'recover-path' to be old job's recovery log path updateRecoveryPaths(recover, sh); } catch (AttributeNotFoundException e1) { throw new FatalConfigurationException( "AttributeNotFoundException occured while setting up" + "new job/profile " + jobName + " \n" + e1.getMessage()); } catch (InvalidAttributeValueException e1) { throw new FatalConfigurationException( "InvalidAttributeValueException occured while setting" + "new job/profile " + jobName + " \n" + e1.getMessage()); } catch (MBeanException e1) { throw new FatalConfigurationException( "MBeanException occured while setting up new" + "new job/profile " + jobName + " \n" + e1.getMessage()); } catch (ReflectionException e1) { throw new FatalConfigurationException( "ReflectionException occured while setting up" + "new job/profile " + jobName + " \n" + e1.getMessage()); } catch (IOException e) { throw new FatalConfigurationException( "IOException occured while setting up" + "new job/profile " + jobName + " \n" + e.getMessage()); } } /** * @param recover * Source to use recovering. Can be full path to a recovery log * or full path to a checkpoint src dir. * @param newHandler * @throws ReflectionException * @throws MBeanException * @throws InvalidAttributeValueException * @throws AttributeNotFoundException * @throws IOException */ private void updateRecoveryPaths(final File recover, SettingsHandler newHandler) throws AttributeNotFoundException, InvalidAttributeValueException, MBeanException, ReflectionException, IOException { if (recover == null || !recover.exists()) { throw new IOException("Recovery src does not exist: " + recover); } newHandler.getOrder().setAttribute( new Attribute(CrawlOrder.ATTR_RECOVER_PATH, recover.getAbsolutePath())); // Now, ensure that 'logs' and 'state' don't overlap with // previous job's files (ok for 'arcs' and 'scratch' to overlap) File newLogsDisk = null; final String RECOVERY_SUFFIX = "-R"; while(true) { try { newLogsDisk = newHandler.getOrder(). getSettingsDir(CrawlOrder.ATTR_LOGS_PATH); } catch (AttributeNotFoundException e) { logger.log(Level.SEVERE, "Failed to get logs directory", e); } if (newLogsDisk.list().length > 0) { // 'new' directory is nonempty; rename with trailing '-R' String logsPath = (String) newHandler.getOrder(). getAttribute(CrawlOrder.ATTR_LOGS_PATH); if(logsPath.endsWith("/")) { logsPath = logsPath.substring(0,logsPath.length()-1); } newHandler.getOrder().setAttribute( new Attribute(CrawlOrder.ATTR_LOGS_PATH, logsPath + RECOVERY_SUFFIX)); } else { // directory is suitably empty; exit loop break; } } File newStateDisk = null; while (true) { try { newStateDisk = newHandler.getOrder().getSettingsDir( CrawlOrder.ATTR_STATE_PATH); } catch (AttributeNotFoundException e) { logger.log(Level.SEVERE, "Failed to get state directory", e); } if (newStateDisk.list().length>0) { // 'new' directory is nonempty; rename with trailing '-R' String statePath = (String) newHandler.getOrder(). getAttribute(CrawlOrder.ATTR_STATE_PATH); if(statePath.endsWith("/")) { statePath = statePath.substring(0,statePath.length()-1); } newHandler.getOrder().setAttribute( new Attribute(CrawlOrder.ATTR_STATE_PATH, statePath + RECOVERY_SUFFIX)); } else { // directory is suitably empty; exit loop break; } } } /** * Discard the handler's 'new job'. This will remove any files/directories
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -