📄 xmlsettingshandler.java
字号:
} else { is = new FileInputStream(f); } if (is != null) { XMLReader parser = SAXParserFactory.newInstance() .newSAXParser().getXMLReader(); InputStream file = new BufferedInputStream(is); parser.setContentHandler(new CrawlSettingsSAXHandler(settings)); InputSource source = new InputSource(file); source.setSystemId(f.toURL().toExternalForm()); parser.parse(source); result = settings; } } catch (SAXParseException e) { logger.warning(e.getMessage() + " in '" + e.getSystemId() + "', line: " + e.getLineNumber() + ", column: " + e.getColumnNumber()); } catch (SAXException e) { logger.warning(e.getMessage() + ": " + e.getException().getMessage()); } catch (ParserConfigurationException e) { logger.warning(e.getMessage() + ": " + e.getCause().getMessage()); } catch (FactoryConfigurationError e) { logger.warning(e.getMessage() + ": " + e.getException().getMessage()); } catch (IOException e) { logger.warning("Could not access file '" + f.getAbsolutePath() + "': " + e.getMessage()); } return result; } protected final CrawlerSettings readSettingsObject(CrawlerSettings settings) { File filename = settingsToFilename(settings); return readSettingsObject(settings, filename); } /** Get the <code>File</code> object pointing to the order file. * * @return File object for the order file. */ public File getOrderFile() { return orderFile; } /** Creates a replica of the settings file structure in another directory * (fully recursive, includes all per host settings). The SettingsHandler * will then refer to the new files. * * Observe that this method should only be called after the SettingsHandler * has been initialized. * * @param newOrderFileName where the new order file should be saved. * @param newSettingsDirectory the top level directory of the per host/domain * settings files. * @throws IOException */ public void copySettings(File newOrderFileName, String newSettingsDirectory) throws IOException { File oldSettingsDirectory = getSettingsDirectory(); // Write new orderfile and point the settingshandler to it orderFile = newOrderFileName; try { getOrder().setAttribute( new Attribute( CrawlOrder.ATTR_SETTINGS_DIRECTORY, newSettingsDirectory)); } catch (Exception e) { throw new IOException("Could not update settings with new location: " + e.getMessage()); } writeSettingsObject(getSettingsObject(null)); File newDir = getPathRelativeToWorkingDirectory(newSettingsDirectory); // Copy the per host files if src and dest directories are different. if (oldSettingsDirectory.compareTo(newDir) != 0) { FileUtils.copyFiles(oldSettingsDirectory, newDir); } } /** * Transforms a relative path so that it is relative to the location of the * order file. If an absolute path is given, it will be returned unchanged.<p> * The location of it's order file is always considered as the 'working' * directory for any given settings. * @param path A relative path to a file (or directory) * @return The same path modified so that it is relative to the file level * location of the order file for the settings handler. */ public File getPathRelativeToWorkingDirectory(String path) { File f = new File(path); // If path is not absolute, set f's directory // relative to the path of the order file if (!f.isAbsolute()) { f = new File(this.getOrderFile().getParent(), path); } return f; } public Collection getDomainOverrides(String rootDomain) { File settingsDir = getSettingsDirectory(); //Find the right start directory. ArrayList domains = new ArrayList(); //First we deconstruct the rootDomain string while(rootDomain != null && rootDomain.length()>0){ if(rootDomain.indexOf('.')<0){ // Last level. domains.add(rootDomain); break; //We're done. } else { // Got more then one level left. domains.add(rootDomain.substring(0,rootDomain.indexOf('.'))); // Strip down rootDomain. rootDomain = rootDomain.substring(rootDomain.indexOf('.')+1); } } //Build up a proper path //Since the domains are right to left, we start at the end of the array. StringBuffer subDir = new StringBuffer(); for(int i=(domains.size()-1) ; i>=0 ; i--){ subDir.append(File.separator+domains.get(i)); } //Then we move to the approprite directory. settingsDir = new File(settingsDir.getPath()+subDir); TreeSet confirmedSubDomains = new TreeSet(new Comparator() { public int compare(Object o1, Object o2) { if(o1 instanceof String && o2 instanceof String){ return ((String)o1).compareTo(o2.toString()); } else { // We only account for strings. return 0; } } } ); if(settingsDir.exists()){ // Found our place! Search through it's subdirs. File[] possibleSubDomains = settingsDir.listFiles(); for (int i = 0; i < possibleSubDomains.length; i++) { if (possibleSubDomains[i].isDirectory() && isOverride(possibleSubDomains[i])) { // Found one! confirmedSubDomains.add(possibleSubDomains[i].getName()); } } } return confirmedSubDomains; } /** * Checks if a file is a a 'per host' override or if it's a directory if it * or it's subdirectories contains a 'per host' override file. * @param f The file or directory to check * @return True if the file is an override or it's a directory that contains * such a file. */ private boolean isOverride(File f){ if(f.isDirectory()){ // Have a directory, check it's contents. File[] subs = f.listFiles(); for(int i=0 ; i < subs.length ; i++){ if(isOverride(subs[i])){ // Found one. Can stop looking. return true; } } } else if (f.getName().equals( settingsFilename + "." + settingsFilenameSuffix)) { // This is an override file (or sure looks like one in any case). return true; } // Didn't find an override. return false; } /** Delete a settings object from persistent storage. * * Deletes the file represented by the submitted settings object. All empty * directories that are parents to the files path are also deleted. * * @param settings the settings object to delete. */ public void deleteSettingsObject(CrawlerSettings settings) { super.deleteSettingsObject(settings); File settingsDirectory = getSettingsDirectory(); File settingsFile = settingsToFilename(settings); settingsFile.delete(); settingsFile = settingsFile.getParentFile(); while (settingsFile.isDirectory() && settingsFile.list().length == 0 && !settingsFile.equals(settingsDirectory)) { settingsFile.delete(); settingsFile = settingsFile.getParentFile(); } } /* (non-Javadoc) * @see org.archive.crawler.settings.SettingsHandler#getListOfAllFiles() */ public List getListOfAllFiles() { ArrayList list = new ArrayList(); // Add CrawlOrder. list.add(getOrderFile().getAbsolutePath()); // Iterate through the entire override hierarchy if (getSettingsDirectory().exists()) { recursiveFindFiles(getSettingsDirectory(),list); } // Get files used by settings modules. recursiveFindSecondaryFiles(getOrder(),list); return list; } /** * Add any files being used by any of the Modules making up the settings to * the list. * * @param mbean A ModuleType to interrogate for files. Any child modules * will be recursively interrogated. * @param list The list to add found files to. */ private void recursiveFindSecondaryFiles(ComplexType mbean, ArrayList list) { MBeanInfo info = mbean.getMBeanInfo(); MBeanAttributeInfo[] a = info.getAttributes(); // Interrogate the current module if(mbean instanceof ModuleType){ ((ModuleType)mbean).listUsedFiles(list); } // Recursively interrogate all sub modules that are of ModuleType for(int n=0; n<a.length; n++) { if(a[n] == null) { // Error null attribute. } else { ModuleAttributeInfo att = (ModuleAttributeInfo)a[n]; Object currentAttribute; try { currentAttribute = mbean.getAttribute(att.getName()); if(currentAttribute instanceof ComplexType) { recursiveFindSecondaryFiles((ComplexType)currentAttribute,list); } } catch (AttributeNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MBeanException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ReflectionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } /** * Starting at the specific directory this method will iterate through all * sub directories and add each file (as absolute name, with path as a * string) to the provided ArrayList. Any file found under the settings * directory with the proper suffix will be considered valid and added to * the list. * @param dir Starting directory * @param list The list to add to */ private void recursiveFindFiles(File dir, ArrayList list){ File[] subs = dir.listFiles(); if (subs != null) { for(int i=0 ; i < subs.length ; i++){ if(subs[i].isDirectory()){ recursiveFindFiles(subs[i],list); } else { if(subs[i].getName().endsWith(settingsFilenameSuffix)){ // Add it to list list.add(subs[i].getAbsolutePath()); } } } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -