⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 settingshandler.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* SettingsHandler * * $Id: SettingsHandler.java 4662 2006-09-25 23:45:21Z paul_jack $ * * Created on Dec 16, 2003 * * Copyright (C) 2004 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package org.archive.crawler.settings;import java.io.File;import java.lang.reflect.Constructor;import java.lang.reflect.InvocationTargetException;import java.text.ParseException;import java.util.Collection;import java.util.Collections;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import java.util.logging.Level;import javax.management.AttributeNotFoundException;import javax.management.InvalidAttributeValueException;import org.archive.crawler.datamodel.CrawlOrder;import org.archive.crawler.settings.refinements.Refinement;import org.archive.net.UURI;import org.archive.util.ArchiveUtils;/** An instance of this class holds a hierarchy of settings. * * More than one instance in memory is allowed so that a new CrawlJob could * be configured while another job is running. * * This class should be subclassed to adapt to a persistent storage. * * @author John Erik Halse */public abstract class SettingsHandler {    /** Cached CrawlerSettings objects */    private SettingsCache settingsCache =        new SettingsCache(new CrawlerSettings(this, null));    /** Reference to the order module */    private CrawlOrder order;    private Set<ValueErrorHandler> valueErrorHandlers      = Collections.synchronizedSet(new HashSet<ValueErrorHandler>());    private int errorReportingLevel = Level.ALL.intValue();    /** Datatypes supported by the settings framwork */    final static String INTEGER = "integer";    final static String LONG = "long";    final static String FLOAT = "float";    final static String DOUBLE = "double";    final static String BOOLEAN = "boolean";    final static String STRING = "string";    final static String TEXT = "text";    final static String OBJECT = "object";    final static String TIMESTAMP = "timestamp";    final static String MAP = "map";    final static String INTEGER_LIST = "integerList";    final static String LONG_LIST = "longList";    final static String FLOAT_LIST = "floatList";    final static String DOUBLE_LIST = "doubleList";    final static String STRING_LIST = "stringList";    private final static String names[][] = new String[][] {            { INTEGER, "java.lang.Integer"},            { LONG, "java.lang.Long"},            { FLOAT, "java.lang.Float"},            { DOUBLE, "java.lang.Double"},            { BOOLEAN, "java.lang.Boolean"},            { STRING, "java.lang.String"},            { TEXT, "org.archive.crawler.settings.TextField"},            { OBJECT, "org.archive.crawler.settings.ModuleType"},            { TIMESTAMP, "java.util.Date"},            { MAP, "org.archive.crawler.settings.MapType"},            { INTEGER_LIST,                    "org.archive.crawler.settings.IntegerList"},            { LONG_LIST, "org.archive.crawler.settings.LongList"},            { FLOAT_LIST, "org.archive.crawler.settings.FloatList"},            { DOUBLE_LIST, "org.archive.crawler.settings.DoubleList"},            { STRING_LIST, "org.archive.crawler.settings.StringList"}};    private final static Map<String,String> name2class     = new HashMap<String,String>();    private final static Map<String,String> class2name     = new HashMap<String,String>();    static {        for (int i = 0; i < names.length; i++) {            name2class.put(names[i][0], names[i][1]);            class2name.put(names[i][1], names[i][0]);        }    }    /** Create a new SettingsHandler object.     *     * @throws InvalidAttributeValueException     */    public SettingsHandler() throws InvalidAttributeValueException {        order = new CrawlOrder();        order.setAsOrder(this);    }    /** Initialize the SettingsHandler.     *     * This method reads the default settings from the persistent storage.     */    public void initialize() {        readSettingsObject(settingsCache.getGlobalSettings());    }        public void cleanup() {        this.settingsCache = null;        if (this.order != null) {            this.order.setController(null);        }        this.order =  null;    }    /** Strip off the leftmost part of a domain name.     *     * @param scope the domain name.     * @return scope with everything before the first dot ripped off.     */    protected String getParentScope(String scope) {        int split = scope.indexOf('.');        return (split == -1)? null: scope.substring(split + 1);    }    /** Get a module by name.     *     * All modules in the order should have unique names. This method makes it     * possible to get the modules of the order by its name.     *     * @param name the modules name.     * @return the module the name references.     */    public ModuleType getModule(String name) {        return settingsCache.getGlobalSettings().getModule(name);    }    /** Get a complex type by its absolute name.     *     * The absolute name is the complex types name and the path leading to     * it.     *     * @param settings the settings object to query.     * @param absoluteName the absolute name of the complex type to get.     * @return the complex type referenced by the absolute name or null if     *         the complex type could not be found in this settings object.     * @throws AttributeNotFoundException is thrown if no ComplexType by this     *         name exist.     */    public ComplexType getComplexTypeByAbsoluteName(            CrawlerSettings settings, String absoluteName)            throws AttributeNotFoundException {        settings = settings == null ? settingsCache.getGlobalSettings() : settings;        DataContainer data = settings.getData(absoluteName);        if (data == null) {            CrawlerSettings parentSettings = settings.getParent();            if (parentSettings == null) {                throw new AttributeNotFoundException(absoluteName);            }            return getComplexTypeByAbsoluteName(parentSettings, absoluteName);        }        return data.getComplexType();    }    protected static String getTypeName(String className) {        return (String) class2name.get(className);    }    protected static String getClassName(String typeName) {        return (String) name2class.get(typeName);    }    /** Convert a String object to an object of <code>typeName</code>.     *     * @param stringValue string to convert.     * @param typeName type to convert to. typeName should be one of the     *        supported types represented by constants in this class.     * @return the new value object.     * @throws ClassCastException is thrown if string could not be converted.     */    protected static Object StringToType(String stringValue, String typeName) {        Object value;        if (typeName == SettingsHandler.STRING) {            value = stringValue;        } else if (typeName == SettingsHandler.TEXT) {            value = new TextField(stringValue);        } else if (typeName == SettingsHandler.INTEGER) {            value = Integer.decode(stringValue);        } else if (typeName == SettingsHandler.LONG) {            value = Long.decode(stringValue);        } else if (typeName == SettingsHandler.BOOLEAN) {            value = Boolean.valueOf(stringValue);        } else if (typeName == SettingsHandler.DOUBLE) {            value = Double.valueOf(stringValue);        } else if (typeName == SettingsHandler.FLOAT) {            value = Float.valueOf(stringValue);        } else if (typeName == SettingsHandler.TIMESTAMP) {            try {                value = ArchiveUtils.parse14DigitDate(stringValue);            } catch (ParseException e) {                throw new ClassCastException(                    "Cannot convert '"                        + stringValue                        + "' to type '"                        + typeName                        + "'");            }        } else {            throw new ClassCastException(                "Cannot convert '"                    + stringValue                    + "' to type '"                    + typeName                    + "'");        }        return value;    }    /** Get CrawlerSettings object in effect for a host or domain.     *     * If there is no specific settings for the host/domain, it will recursively     * go up the hierarchy to find the settings object that should be used for     * this host/domain.     *     * @param host the host or domain to get the settings for.     * @return settings object in effect for the host/domain.     * @see #getSettingsObject(String)     * @see #getOrCreateSettingsObject(String)     */    public CrawlerSettings getSettings(String host) {        return getRefinementsForSettings(getSettingsForHost(host), null);    }    /** Get CrawlerSettings object in effect for a host or domain.    *    * If there is no specific settings for the host/domain, it will recursively    * go up the hierarchy to find the settings object that should be used for    * this host/domain.    * <p/>    * This method passes around a URI that refinement are checked against.    *    * @param host the host or domain to get the settings for.    * @param uuri UURI for context.    * @return settings object in effect for the host/domain.    * @see #getSettingsObject(String)    * @see #getOrCreateSettingsObject(String)    */    public CrawlerSettings getSettings(String host, UURI uuri) {        return getRefinementsForSettings(getSettingsForHost(host), uuri);    }    protected CrawlerSettings getSettingsForHost(String host) {        CrawlerSettings settings = settingsCache.getSettings(host, null);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -