⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 settingshandler.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
        if (settings == null) {            String tmpHost = host;            settings = getSettingsObject(tmpHost);            while (settings == null && tmpHost != null) {                tmpHost = getParentScope(tmpHost);                settings = getSettingsObject(tmpHost);            }            settingsCache.putSettings(host, settings);        }        return settings;    }    private CrawlerSettings getRefinementsForSettings(CrawlerSettings settings,            UURI uri) {        if (settings.hasRefinements()) {            for(Iterator it = settings.refinementsIterator(); it.hasNext();) {                Refinement refinement = (Refinement) it.next();                if (refinement.isWithinRefinementBounds(uri)) {                    settings = getSettingsObject(settings.getScope(),                            refinement.getReference());                }            }        }        return settings;    }    /** Get CrawlerSettings object for a host or domain.     *     * The difference between this method and the     * <code>getSettings(String host)</code> is that this method will return     * null if there is no settings for particular host or domain.     *     * @param scope the host or domain to get the settings for.     * @return settings object for the host/domain or null if no     *         settings exist for the host/domain.     * @see #getSettings(String)     * @see #getOrCreateSettingsObject(String)     */    public CrawlerSettings getSettingsObject(String scope) {        return getSettingsObject(scope, null);    }    /**     * Get CrawlerSettings object for a host/domain and a particular refinement.     *     * @param scope the host or domain to get the settings for.     * @param refinement the refinement reference to get.     * @return CrawlerSettings object for a host/domain and a particular     * refinement or null if no settings exist for the host/domain.     */    public CrawlerSettings getSettingsObject(String scope, String refinement) {        CrawlerSettings settings =            settingsCache.getSettingsObject(scope, refinement);        if (settings == null) {            // Reference not found            settings = new CrawlerSettings(this, scope, refinement);            // Try to read settings from persisten storage. If its not there            // it will be set to null.            settings = readSettingsObject(settings);            if (settings != null) {                settingsCache.putSettings(scope, settings);            }        }        return settings;    }    /** Get or create CrawlerSettings object for a host or domain.     *     * This method is similar to {@link #getSettingsObject(String)} except that     * if there is no settings for this particular host or domain a new settings     * object will be returned.     *     * @param scope the host or domain to get or create the settings for.     * @return settings object for the host/domain.     * @see #getSettings(String)     * @see #getSettingsObject(String)     */    public CrawlerSettings getOrCreateSettingsObject(String scope) {        return getOrCreateSettingsObject(scope, null);    }    public CrawlerSettings getOrCreateSettingsObject(String scope,            String refinement) {        CrawlerSettings settings;        settings = getSettingsObject(scope, refinement);        if (settings == null) {            scope = scope.intern();            // No existing settings object found, create one            settings = new CrawlerSettings(this, scope, refinement);            settingsCache.refreshHostToSettings();            settingsCache.putSettings(scope, settings);        }        return settings;    }    /** Write the CrawlerSettings object to persistent storage.     *     * @param settings the settings object to write.     */    public abstract void writeSettingsObject(CrawlerSettings settings);    /** Read the CrawlerSettings object from persistent storage.     *     * @param settings the settings object to be updated with data from the     *                 persistent storage.     * @return the updated settings object or null if there was no data for this     *         in the persistent storage.     */    protected abstract CrawlerSettings readSettingsObject(CrawlerSettings settings);    /** Delete a settings object from persistent storage.     *     * @param settings the settings object to delete.     */    public void deleteSettingsObject(CrawlerSettings settings) {        settingsCache.deleteSettingsObject(settings);    }    /** Get the CrawlOrder.     *     * @return the CrawlOrder     */    public CrawlOrder getOrder() {        return order;    }    /** Instatiate a new ModuleType given its name and className.     *     * @param name the name for the new ComplexType.     * @param className the class name of the new ComplexType.     * @return an instance of the class identified by className.     *     * @throws InvocationTargetException     */    public static ModuleType instantiateModuleTypeFromClassName(            String name, String className)            throws InvocationTargetException {        Class cl;        try {            cl = Class.forName(className);        } catch (ClassNotFoundException e) {            throw new InvocationTargetException(e);        }        ModuleType module;        try {            Constructor co =                cl.getConstructor(new Class[] { String.class });            module = (ModuleType) co.newInstance(new Object[] { name });        } catch (IllegalArgumentException e) {            throw new InvocationTargetException(e);        } catch (InstantiationException e) {            throw new InvocationTargetException(e);        } catch (IllegalAccessException e) {            throw new InvocationTargetException(e);        } catch (SecurityException e) {            throw new InvocationTargetException(e);        } catch (NoSuchMethodException e) {            throw new InvocationTargetException(e);        }        return module;    }    /**     * Transforms a relative path so that it is relative to a location that is     * regarded as a working dir for these settings. If an absolute path is given,     * it will be returned unchanged.     * @param path A relative path to a file (or directory)     * @return The same path modified so that it is relative to the file level     *         location that is considered the working directory for these settings.     */    public abstract File getPathRelativeToWorkingDirectory(String path);    /**     * Will return a Collection of strings with domains that contain 'per'     * domain overrides (or their subdomains contain them).      *      * The domains considered are     * limited to those that are subdomains of the supplied domain. If null or     * empty string is supplied the TLDs will be considered.     * @param rootDomain The domain to get domain overrides for. Examples:     *                   'org', 'archive.org', 'crawler.archive.org' etc.     * @return An array of domains that contain overrides. If rootDomain does not     *         exist an empty array will be returned.     */    public abstract Collection getDomainOverrides(String rootDomain);    /**     * Unregister an instance of {@link ValueErrorHandler}.     *     * @param errorHandler the <code>CalueErrorHandler</code> to unregister.     *     * @see ValueErrorHandler     * @see #setErrorReportingLevel(Level)     * @see #registerValueErrorHandler(ValueErrorHandler)     *     */    public void unregisterValueErrorHandler(ValueErrorHandler errorHandler) {        valueErrorHandlers.remove(errorHandler);    }    /**     * Register an instance of {@link ValueErrorHandler}.     * <p>     * If a ValueErrorHandler is registered, only constraints with level     * {@link Level#SEVERE}will throw an {@link InvalidAttributeValueException}.     * The ValueErrorHandler will recieve a notification for all failed checks     * with level equal or greater than the error reporting level.     *     * @param errorHandler the <code>CalueErrorHandler</code> to register.     *     * @see ValueErrorHandler     * @see #setErrorReportingLevel(Level)     * @see #unregisterValueErrorHandler(ValueErrorHandler)     */    public void registerValueErrorHandler(ValueErrorHandler errorHandler) {        if (errorHandler != null) {            valueErrorHandlers.add(errorHandler);        }    }    /**     * Fire events on all registered {@link ValueErrorHandler}.     *     * @param error the failed constraints return value.     * @return true if there was any registered ValueErrorHandlers to notify.     */    boolean fireValueErrorHandlers(Constraint.FailedCheck error) {        if (error.getLevel().intValue() >= errorReportingLevel) {            for (Iterator it = valueErrorHandlers.iterator(); it.hasNext();) {                ((ValueErrorHandler) it.next()).handleValueError(error);            }        }        return valueErrorHandlers.size() > 0;    }    /**     * Set the level for which notification of failed constraints will be fired.     *     * @param level the error reporting level.     */    public void setErrorReportingLevel(Level level) {        errorReportingLevel = level.intValue();    }    /**     * Creates and returns a <tt>List</tt> of all files comprising the current     * settings framework.     *     * <p>The List contains the absolute String path of each file.     *     * <p>The list should contain any configurable files, including such files     * as seed file and any other files use by the various settings modules.     *     * <p>Implementations of the SettingsHandler that do not use files for     * permanent storage should return an empty list.     * @return <code>List</code> of framework files.     */    public abstract List getListOfAllFiles();        /**     * Clear any per-host settings cached in memory; allows editting of      * per-host settings files on disk, perhaps in bulk/automated fashion,     * to take effect in running crawl.      */    public void clearPerHostSettingsCache() {        settingsCache.clear();    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -