⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlsettingshandler.java

📁 最强的爬虫工程
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
            } else {                is = new FileInputStream(f);            }            if (is != null) {                XMLReader parser = SAXParserFactory.newInstance()                    .newSAXParser().getXMLReader();                InputStream file = new BufferedInputStream(is);                parser.setContentHandler(new CrawlSettingsSAXHandler(settings));                InputSource source = new InputSource(file);                source.setSystemId(f.toURL().toExternalForm());                parser.parse(source);                result = settings;            }        } catch (SAXParseException e) {            logger.warning(e.getMessage() + " in '" + e.getSystemId()                + "', line: " + e.getLineNumber() + ", column: "                + e.getColumnNumber());        } catch (SAXException e) {            logger.warning(e.getMessage() + ": "                + e.getException().getMessage());        } catch (ParserConfigurationException e) {            logger.warning(e.getMessage() + ": "                + e.getCause().getMessage());        } catch (FactoryConfigurationError e) {            logger.warning(e.getMessage() + ": "                + e.getException().getMessage());        } catch (IOException e) {            logger.warning("Could not access file '"                + f.getAbsolutePath() + "': " + e.getMessage());        }        return result;    }    protected final CrawlerSettings readSettingsObject(CrawlerSettings settings) {        File filename = settingsToFilename(settings);        return readSettingsObject(settings, filename);    }    /** Get the <code>File</code> object pointing to the order file.     *     * @return File object for the order file.     */    public File getOrderFile() {        return orderFile;    }    /** Creates a replica of the settings file structure in another directory     * (fully recursive, includes all per host settings). The SettingsHandler     * will then refer to the new files.     *     * Observe that this method should only be called after the SettingsHandler     * has been initialized.     *     * @param newOrderFileName where the new order file should be saved.     * @param newSettingsDirectory the top level directory of the per host/domain     *                          settings files.     * @throws IOException     */    public void copySettings(File newOrderFileName, String newSettingsDirectory)      throws IOException {        File oldSettingsDirectory = getSettingsDirectory();        // Write new orderfile and point the settingshandler to it        orderFile = newOrderFileName;        try {            getOrder().setAttribute(                new Attribute(                    CrawlOrder.ATTR_SETTINGS_DIRECTORY, newSettingsDirectory));        } catch (Exception e) {            throw new IOException("Could not update settings with new location: "                + e.getMessage());        }        writeSettingsObject(getSettingsObject(null));        File newDir = getPathRelativeToWorkingDirectory(newSettingsDirectory);        // Copy the per host files if src and dest directories are different.        if (oldSettingsDirectory.compareTo(newDir) != 0) {            FileUtils.copyFiles(oldSettingsDirectory, newDir);        }    }    /**     * Transforms a relative path so that it is relative to the location of the     * order file. If an absolute path is given, it will be returned unchanged.<p>     * The location of it's order file is always considered as the 'working'     * directory for any given settings.     * @param path A relative path to a file (or directory)     * @return The same path modified so that it is relative to the file level     *         location of the order file for the settings handler.     */    public File getPathRelativeToWorkingDirectory(String path) {        File f = new File(path);        // If path is not absolute, set f's directory        // relative to the path of the order file        if (!f.isAbsolute()) {            f = new File(this.getOrderFile().getParent(), path);        }        return f;    }    public Collection getDomainOverrides(String rootDomain) {        File settingsDir = getSettingsDirectory();        //Find the right start directory.        ArrayList domains = new ArrayList();        //First we deconstruct the rootDomain string        while(rootDomain != null && rootDomain.length()>0){            if(rootDomain.indexOf('.')<0){                // Last level.                domains.add(rootDomain);                break; //We're done.            } else {                // Got more then one level left.                domains.add(rootDomain.substring(0,rootDomain.indexOf('.')));                // Strip down rootDomain.                rootDomain = rootDomain.substring(rootDomain.indexOf('.')+1);            }        }        //Build up a proper path        //Since the domains are right to left, we start at the end of the array.        StringBuffer subDir = new StringBuffer();        for(int i=(domains.size()-1) ; i>=0 ; i--){            subDir.append(File.separator+domains.get(i));        }        //Then we move to the approprite directory.        settingsDir = new File(settingsDir.getPath()+subDir);        TreeSet confirmedSubDomains = new TreeSet(new Comparator() {                public int compare(Object o1, Object o2) {                    if(o1 instanceof String && o2 instanceof String){                        return ((String)o1).compareTo(o2.toString());                    } else {                        // We only account for strings.                        return 0;                    }                }            }        );        if(settingsDir.exists()){            // Found our place! Search through it's subdirs.            File[] possibleSubDomains = settingsDir.listFiles();            for (int i = 0; i < possibleSubDomains.length; i++) {                if (possibleSubDomains[i].isDirectory()                    && isOverride(possibleSubDomains[i])) {                    // Found one!                    confirmedSubDomains.add(possibleSubDomains[i].getName());                }            }        }        return confirmedSubDomains;    }    /**     * Checks if a file is a a 'per host' override or if it's a directory if it     * or it's subdirectories  contains a 'per host' override file.     * @param f The file or directory to check     * @return True if the file is an override or it's a directory that contains     *         such a file.     */    private boolean isOverride(File f){        if(f.isDirectory()){            // Have a directory, check it's contents.            File[] subs = f.listFiles();            for(int i=0 ; i < subs.length ; i++){                if(isOverride(subs[i])){                    // Found one. Can stop looking.                    return true;                }            }        } else if (f.getName().equals(                settingsFilename + "." + settingsFilenameSuffix)) {            // This is an override file (or sure looks like one in any case).            return true;        }        // Didn't find an override.        return false;    }    /** Delete a settings object from persistent storage.     *     * Deletes the file represented by the submitted settings object. All empty     * directories that are parents to the files path are also deleted.     *     * @param settings the settings object to delete.     */    public void deleteSettingsObject(CrawlerSettings settings) {        super.deleteSettingsObject(settings);        File settingsDirectory = getSettingsDirectory();        File settingsFile = settingsToFilename(settings);        settingsFile.delete();        settingsFile = settingsFile.getParentFile();        while (settingsFile.isDirectory() && settingsFile.list().length == 0                && !settingsFile.equals(settingsDirectory)) {            settingsFile.delete();            settingsFile = settingsFile.getParentFile();        }    }    /* (non-Javadoc)     * @see org.archive.crawler.settings.SettingsHandler#getListOfAllFiles()     */    public List getListOfAllFiles() {        ArrayList list = new ArrayList();        // Add CrawlOrder.        list.add(getOrderFile().getAbsolutePath());        // Iterate through the entire override hierarchy        if (getSettingsDirectory().exists()) {            recursiveFindFiles(getSettingsDirectory(),list);        }        // Get files used by settings modules.        recursiveFindSecondaryFiles(getOrder(),list);        return list;    }    /**     * Add any files being used by any of the Modules making up the settings to     * the list.     *     * @param mbean A ModuleType to interrogate for files. Any child modules     *           will be recursively interrogated.     * @param list The list to add found files to.     */    private void recursiveFindSecondaryFiles(ComplexType mbean, ArrayList list) {        MBeanInfo info = mbean.getMBeanInfo();        MBeanAttributeInfo[] a = info.getAttributes();        // Interrogate the current module        if(mbean instanceof ModuleType){            ((ModuleType)mbean).listUsedFiles(list);        }        // Recursively interrogate all sub modules that are of ModuleType        for(int n=0; n<a.length; n++) {            if(a[n] == null) {                // Error null attribute.            } else {                ModuleAttributeInfo att = (ModuleAttributeInfo)a[n];                Object currentAttribute;                try {                    currentAttribute = mbean.getAttribute(att.getName());                    if(currentAttribute instanceof ComplexType) {                        recursiveFindSecondaryFiles((ComplexType)currentAttribute,list);                    }                } catch (AttributeNotFoundException e) {                    // TODO Auto-generated catch block                    e.printStackTrace();                } catch (MBeanException e) {                    // TODO Auto-generated catch block                    e.printStackTrace();                } catch (ReflectionException e) {                    // TODO Auto-generated catch block                    e.printStackTrace();                }            }        }    }    /**     * Starting at the specific directory this method will iterate through all     * sub directories and add each file (as absolute name, with path as a     * string) to the provided ArrayList. Any file found under the settings     * directory with the proper suffix will be considered valid and added to     * the list.     * @param dir Starting directory     * @param list The list to add to     */    private void recursiveFindFiles(File dir, ArrayList list){        File[] subs = dir.listFiles();        if (subs != null) {            for(int i=0 ; i < subs.length ; i++){                if(subs[i].isDirectory()){                    recursiveFindFiles(subs[i],list);                } else {                    if(subs[i].getName().endsWith(settingsFilenameSuffix)){                        // Add it to list                        list.add(subs[i].getAbsolutePath());                    }                }            }        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -