📄 settingshandler.java
字号:
if (settings == null) { String tmpHost = host; settings = getSettingsObject(tmpHost); while (settings == null && tmpHost != null) { tmpHost = getParentScope(tmpHost); settings = getSettingsObject(tmpHost); } settingsCache.putSettings(host, settings); } return settings; } private CrawlerSettings getRefinementsForSettings(CrawlerSettings settings, UURI uri) { if (settings.hasRefinements()) { for(Iterator it = settings.refinementsIterator(); it.hasNext();) { Refinement refinement = (Refinement) it.next(); if (refinement.isWithinRefinementBounds(uri)) { settings = getSettingsObject(settings.getScope(), refinement.getReference()); } } } return settings; } /** Get CrawlerSettings object for a host or domain. * * The difference between this method and the * <code>getSettings(String host)</code> is that this method will return * null if there is no settings for particular host or domain. * * @param scope the host or domain to get the settings for. * @return settings object for the host/domain or null if no * settings exist for the host/domain. * @see #getSettings(String) * @see #getOrCreateSettingsObject(String) */ public CrawlerSettings getSettingsObject(String scope) { return getSettingsObject(scope, null); } /** * Get CrawlerSettings object for a host/domain and a particular refinement. * * @param scope the host or domain to get the settings for. * @param refinement the refinement reference to get. * @return CrawlerSettings object for a host/domain and a particular * refinement or null if no settings exist for the host/domain. */ public CrawlerSettings getSettingsObject(String scope, String refinement) { CrawlerSettings settings = settingsCache.getSettingsObject(scope, refinement); if (settings == null) { // Reference not found settings = new CrawlerSettings(this, scope, refinement); // Try to read settings from persisten storage. If its not there // it will be set to null. settings = readSettingsObject(settings); if (settings != null) { settingsCache.putSettings(scope, settings); } } return settings; } /** Get or create CrawlerSettings object for a host or domain. * * This method is similar to {@link #getSettingsObject(String)} except that * if there is no settings for this particular host or domain a new settings * object will be returned. * * @param scope the host or domain to get or create the settings for. * @return settings object for the host/domain. * @see #getSettings(String) * @see #getSettingsObject(String) */ public CrawlerSettings getOrCreateSettingsObject(String scope) { return getOrCreateSettingsObject(scope, null); } public CrawlerSettings getOrCreateSettingsObject(String scope, String refinement) { CrawlerSettings settings; settings = getSettingsObject(scope, refinement); if (settings == null) { scope = scope.intern(); // No existing settings object found, create one settings = new CrawlerSettings(this, scope, refinement); settingsCache.refreshHostToSettings(); settingsCache.putSettings(scope, settings); } return settings; } /** Write the CrawlerSettings object to persistent storage. * * @param settings the settings object to write. */ public abstract void writeSettingsObject(CrawlerSettings settings); /** Read the CrawlerSettings object from persistent storage. * * @param settings the settings object to be updated with data from the * persistent storage. * @return the updated settings object or null if there was no data for this * in the persistent storage. */ protected abstract CrawlerSettings readSettingsObject(CrawlerSettings settings); /** Delete a settings object from persistent storage. * * @param settings the settings object to delete. */ public void deleteSettingsObject(CrawlerSettings settings) { settingsCache.deleteSettingsObject(settings); } /** Get the CrawlOrder. * * @return the CrawlOrder */ public CrawlOrder getOrder() { return order; } /** Instatiate a new ModuleType given its name and className. * * @param name the name for the new ComplexType. * @param className the class name of the new ComplexType. * @return an instance of the class identified by className. * * @throws InvocationTargetException */ public static ModuleType instantiateModuleTypeFromClassName( String name, String className) throws InvocationTargetException { Class cl; try { cl = Class.forName(className); } catch (ClassNotFoundException e) { throw new InvocationTargetException(e); } ModuleType module; try { Constructor co = cl.getConstructor(new Class[] { String.class }); module = (ModuleType) co.newInstance(new Object[] { name }); } catch (IllegalArgumentException e) { throw new InvocationTargetException(e); } catch (InstantiationException e) { throw new InvocationTargetException(e); } catch (IllegalAccessException e) { throw new InvocationTargetException(e); } catch (SecurityException e) { throw new InvocationTargetException(e); } catch (NoSuchMethodException e) { throw new InvocationTargetException(e); } return module; } /** * Transforms a relative path so that it is relative to a location that is * regarded as a working dir for these settings. If an absolute path is given, * it will be returned unchanged. * @param path A relative path to a file (or directory) * @return The same path modified so that it is relative to the file level * location that is considered the working directory for these settings. */ public abstract File getPathRelativeToWorkingDirectory(String path); /** * Will return a Collection of strings with domains that contain 'per' * domain overrides (or their subdomains contain them). * * The domains considered are * limited to those that are subdomains of the supplied domain. If null or * empty string is supplied the TLDs will be considered. * @param rootDomain The domain to get domain overrides for. Examples: * 'org', 'archive.org', 'crawler.archive.org' etc. * @return An array of domains that contain overrides. If rootDomain does not * exist an empty array will be returned. */ public abstract Collection getDomainOverrides(String rootDomain); /** * Unregister an instance of {@link ValueErrorHandler}. * * @param errorHandler the <code>CalueErrorHandler</code> to unregister. * * @see ValueErrorHandler * @see #setErrorReportingLevel(Level) * @see #registerValueErrorHandler(ValueErrorHandler) * */ public void unregisterValueErrorHandler(ValueErrorHandler errorHandler) { valueErrorHandlers.remove(errorHandler); } /** * Register an instance of {@link ValueErrorHandler}. * <p> * If a ValueErrorHandler is registered, only constraints with level * {@link Level#SEVERE}will throw an {@link InvalidAttributeValueException}. * The ValueErrorHandler will recieve a notification for all failed checks * with level equal or greater than the error reporting level. * * @param errorHandler the <code>CalueErrorHandler</code> to register. * * @see ValueErrorHandler * @see #setErrorReportingLevel(Level) * @see #unregisterValueErrorHandler(ValueErrorHandler) */ public void registerValueErrorHandler(ValueErrorHandler errorHandler) { if (errorHandler != null) { valueErrorHandlers.add(errorHandler); } } /** * Fire events on all registered {@link ValueErrorHandler}. * * @param error the failed constraints return value. * @return true if there was any registered ValueErrorHandlers to notify. */ boolean fireValueErrorHandlers(Constraint.FailedCheck error) { if (error.getLevel().intValue() >= errorReportingLevel) { for (Iterator it = valueErrorHandlers.iterator(); it.hasNext();) { ((ValueErrorHandler) it.next()).handleValueError(error); } } return valueErrorHandlers.size() > 0; } /** * Set the level for which notification of failed constraints will be fired. * * @param level the error reporting level. */ public void setErrorReportingLevel(Level level) { errorReportingLevel = level.intValue(); } /** * Creates and returns a <tt>List</tt> of all files comprising the current * settings framework. * * <p>The List contains the absolute String path of each file. * * <p>The list should contain any configurable files, including such files * as seed file and any other files use by the various settings modules. * * <p>Implementations of the SettingsHandler that do not use files for * permanent storage should return an empty list. * @return <code>List</code> of framework files. */ public abstract List getListOfAllFiles(); /** * Clear any per-host settings cached in memory; allows editting of * per-host settings files on disk, perhaps in bulk/automated fashion, * to take effect in running crawl. */ public void clearPerHostSettingsCache() { settingsCache.clear(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -