📄 crawljob.java
字号:
attributes.add(new OpenMBeanAttributeInfoSupport(DOC_RATE_ATTR, "Crawling rate (Docs/sec)", SimpleType.DOUBLE, true, false, false)); attributes.add(new OpenMBeanAttributeInfoSupport(KB_RATE_ATTR, "Current crawling rate (Kb/sec)", SimpleType.LONG, true, false, false)); attributes.add(new OpenMBeanAttributeInfoSupport(DOWNLOAD_COUNT_ATTR, "Count of downloaded documents", SimpleType.LONG, true, false, false)); attributes.add(new OpenMBeanAttributeInfoSupport(DISCOVERED_COUNT_ATTR, "Count of discovered documents", SimpleType.LONG, true, false, false)); // Add in the crawl order attributes. addCrawlOrderAttributes(this.getController().getOrder(), attributes); // Add the bdbje attributes. Convert to open mbean attributes. // First do bdbeje setup. Then add a subset of the bdbje attributes. // Keep around the list of names as a convenience for when it comes // time to test if attribute is supported. Environment env = this.controller.getBdbEnvironment(); try { this.bdbjeMBeanHelper = new JEMBeanHelper(env.getConfig(), env.getHome(), true); } catch (DatabaseException e) { e.printStackTrace(); InitializationException ie = new InitializationException(e.getMessage()); ie.setStackTrace(e.getStackTrace()); throw ie; } this.bdbjeAttributeNameList = Arrays.asList(new String [] { JEMBeanHelper.ATT_ENV_HOME, JEMBeanHelper.ATT_OPEN, JEMBeanHelper.ATT_IS_READ_ONLY, JEMBeanHelper.ATT_IS_TRANSACTIONAL, JEMBeanHelper.ATT_CACHE_SIZE, JEMBeanHelper.ATT_CACHE_PERCENT, JEMBeanHelper.ATT_LOCK_TIMEOUT, JEMBeanHelper.ATT_IS_SERIALIZABLE, JEMBeanHelper.ATT_SET_READ_ONLY, }); addBdbjeAttributes(attributes, this.bdbjeMBeanHelper.getAttributeList(env), this.bdbjeAttributeNameList); // Operations. List<OpenMBeanOperationInfo> operations = new ArrayList<OpenMBeanOperationInfo>(); OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[3]; args[0] = new OpenMBeanParameterInfoSupport("url", "URL to add to the frontier", SimpleType.STRING); args[1] = new OpenMBeanParameterInfoSupport("forceFetch", "True if URL is to be force fetched", SimpleType.BOOLEAN); args[2] = new OpenMBeanParameterInfoSupport("seed", "True if URL is a seed", SimpleType.BOOLEAN); operations.add(new OpenMBeanOperationInfoSupport(IMPORT_URI_OPER, "Add passed URL to the frontier", args, SimpleType.VOID, MBeanOperationInfo.ACTION)); args = new OpenMBeanParameterInfoSupport[4]; args[0] = new OpenMBeanParameterInfoSupport("pathOrUrl", "Path or URL to file of URLs", SimpleType.STRING); args[1] = new OpenMBeanParameterInfoSupport("style", "Format format:default|crawlLog|recoveryJournal", SimpleType.STRING); args[2] = new OpenMBeanParameterInfoSupport("forceFetch", "True if URLs are to be force fetched", SimpleType.BOOLEAN); args[3] = new OpenMBeanParameterInfoSupport("seed", "True if all content are seeds.", SimpleType.BOOLEAN); operations.add(new OpenMBeanOperationInfoSupport(IMPORT_URIS_OPER, "Add file of passed URLs to the frontier", args, SimpleType.STRING, MBeanOperationInfo.ACTION)); operations.add(new OpenMBeanOperationInfoSupport(PAUSE_OPER, "Pause crawling (noop if already paused)", null, SimpleType.VOID, MBeanOperationInfo.ACTION)); operations.add(new OpenMBeanOperationInfoSupport(RESUME_OPER, "Resume crawling (noop if already resumed)", null, SimpleType.VOID, MBeanOperationInfo.ACTION)); args = new OpenMBeanParameterInfoSupport[1]; args[0] = new OpenMBeanParameterInfoSupport("name", "Name of report ('all', 'standard', etc.).", SimpleType.STRING); operations.add(new OpenMBeanOperationInfoSupport(FRONTIER_REPORT_OPER, "Full frontier report", args, SimpleType.STRING, MBeanOperationInfo.INFO)); operations.add(new OpenMBeanOperationInfoSupport(THREADS_REPORT_OPER, "Full thread report", null, SimpleType.STRING, MBeanOperationInfo.INFO)); operations.add(new OpenMBeanOperationInfoSupport(SEEDS_REPORT_OPER, "Seeds report", null, SimpleType.STRING, MBeanOperationInfo.INFO)); operations.add( new OpenMBeanOperationInfoSupport(PROGRESS_STATISTICS_OPER, "Progress statistics at time of invocation", null, SimpleType.STRING, MBeanOperationInfo.INFO)); operations.add(new OpenMBeanOperationInfoSupport( PROGRESS_STATISTICS_LEGEND_OPER, "Progress statistics legend", null, SimpleType.STRING, MBeanOperationInfo.INFO)); operations.add(new OpenMBeanOperationInfoSupport(CHECKPOINT_OPER, "Start a checkpoint", null, SimpleType.VOID, MBeanOperationInfo.ACTION)); // Add bdbje operations. Add subset only. Keep around the list so have // it to hand when figuring what operations are supported. Usual actual // Strings because not accessible from JEMBeanHelper. this.bdbjeOperationsNameList = Arrays.asList(new String[] { "cleanLog", "evictMemory", "checkpoint", "sync", "getEnvironmentStatsToString", "getLockStatsToString", "getDatabaseNames", OP_DB_STAT }); addBdbjeOperations(operations, this.bdbjeMBeanHelper.getOperationList(env), this.bdbjeOperationsNameList); // Register notifications List<MBeanNotificationInfo> notifications = new ArrayList<MBeanNotificationInfo>(); notifications.add( new MBeanNotificationInfo(new String [] {"crawlStarted", "crawlEnding", "crawlPaused", "crawlResuming", PROG_STATS}, this.getClass().getName() + ".notifications", "CrawlStatusListener events and progress statistics as " + "notifications")); MBeanNotificationInfo [] notificationsArray = new MBeanNotificationInfo[notifications.size()]; notifications.toArray(notificationsArray); // Build the info object. OpenMBeanAttributeInfoSupport[] attributesArray = new OpenMBeanAttributeInfoSupport[attributes.size()]; attributes.toArray(attributesArray); OpenMBeanOperationInfoSupport[] operationsArray = new OpenMBeanOperationInfoSupport[operations.size()]; operations.toArray(operationsArray); return new OpenMBeanInfoSupport(this.getClass().getName(), "Current Crawl Job as OpenMBean", attributesArray, new OpenMBeanConstructorInfoSupport [] {}, operationsArray, notificationsArray); } protected void addBdbjeAttributes( final List<OpenMBeanAttributeInfo> attributes, final List<MBeanAttributeInfo> bdbjeAttributes, final List<String> bdbjeNamesToAdd) { for (MBeanAttributeInfo info: bdbjeAttributes) { if (bdbjeNamesToAdd.contains(info.getName())) { attributes.add(JmxUtils.convertToOpenMBeanAttribute(info)); } } } protected void addBdbjeOperations( final List<OpenMBeanOperationInfo> operations, final List<MBeanOperationInfo> bdbjeOperations, final List<String> bdbjeNamesToAdd) { for (MBeanOperationInfo info: bdbjeOperations) { if (bdbjeNamesToAdd.contains(info.getName())) { OpenMBeanOperationInfo omboi = null; if (info.getName().equals(OP_DB_STAT)) { // Db stats needs special handling. The published // signature is wrong and its return type is awkward. // Handle it. omboi = JmxUtils.convertToOpenMBeanOperation(info, null, SimpleType.STRING); MBeanParameterInfo[] params = omboi.getSignature(); OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[params.length + 1]; for (int ii = 0; ii < params.length; ii++) { args[ii] = (OpenMBeanParameterInfo) params[ii]; } args[params.length] = new OpenMBeanParameterInfoSupport( "name", "Database name", SimpleType.STRING); omboi = new OpenMBeanOperationInfoSupport(omboi.getName(), omboi.getDescription(), args, omboi.getReturnOpenType(), omboi.getImpact()); } else { omboi = JmxUtils.convertToOpenMBeanOperation(info); } operations.add(omboi); } } } protected void addCrawlOrderAttributes(final ComplexType type, final List<OpenMBeanAttributeInfo> attributes) { for (final Iterator i = type.getAttributeInfoIterator(null); i.hasNext();) { ModuleAttributeInfo info = (ModuleAttributeInfo)i.next(); if (ORDER_EXCLUDE.contains(info.getName())) { // Skip. continue; } String absoluteName = type.getAbsoluteName() + "/" + info.getName(); if (JmxUtils.isOpenType(info.getType())) { String description = info.getDescription(); if (description == null || description.length() <= 0) { // Description can't be empty. description = info.getName(); } attributes.add(new OpenMBeanAttributeInfoSupport( absoluteName, description, JmxUtils.getOpenType(info.getType()), true, true, false)); } else if(info.isComplexType()) { try { ComplexType c = (ComplexType)type.getAttribute(info.getName()); addCrawlOrderAttributes(c, attributes); } catch (AttributeNotFoundException e) { logger.log(Level.SEVERE, "Failed get of attribute", e); } catch (MBeanException e) { logger.log(Level.SEVERE, "Failed get of attribute", e); } catch (ReflectionException e) { logger.log(Level.SEVERE, "Failed get of attribute", e); } } else if (info.getType().equals(TextField.class.getName())) { // Special handling for TextField. Use the STRING OpenType. attributes.add(new OpenMBeanAttributeInfoSupport( absoluteName, info.getDescription(), SimpleType.STRING, true, true, false)); } else { // Looks like only type we don't currently handle is StringList. // Figure how to do it. Add as AttributeList? logger.fine(info.getType()); } } } public Object getAttribute(String attribute_name) throws AttributeNotFoundException { if (attribute_name == null) { throw new RuntimeOperationsException( new IllegalArgumentException("Attribute name cannot be null"), "Cannot call getAttribute with null attribute name"); } // If no controller, we can't do any work in here. if (this.controller == null) { throw new RuntimeOperationsException( new NullPointerException("Controller is null"), "Controller is null"); } // Is it a bdbje attribute? if (this.bdbjeAttributeNameList.contains(attribute_name)) { try { return this.bdbjeMBeanHelper.getAttribute( this.controller.getBdbEnvironment(), attribute_name); } catch (MBeanException e) { throw new RuntimeOperationsException(new RuntimeException(e)); } } // Is it a crawl-order attribute? if (attribute_name. startsWith(this.controller.getOrder().getAbsoluteName())) { return getCrawlOrderAttribute(attribute_name); } if (!ATTRIBUTE_LIST.contains(attribute_name)) { throw new AttributeNotFoundException("Attribute " + attribute_name + " is unimplemented."); } // The pattern in the below is to match an attribute and when found // do a return out of if clause. Doing it this way, I can fall // on to the AttributeNotFoundException for case where we've an // attribute but no handler. if (attribute_name.equals(STATUS_ATTR)) { return getCrawlStatus(); } if (attribute_name.equals(NAME_ATTR)) { return getJobName(); } if (attribute_name.equals(UID_ATTR)) { return getUID(); } if (attribute_name.equals(TOTAL_DATA_ATTR)) { return new Long(this.controller == null && this.controller.getStatistics() != null? 0: this.controller.getStatistics().totalBytesWritten()); } if (attribute_name.equals(CRAWL_TIME_ATTR)) { return new Long(this.controller == null && this.controller.getStatistics() != null? 0: this.controller.getStatistics().getCrawlerTotalElapsedTime() / 1000); } if (attribute_name.equals(CURRENT_DOC_RATE_ATTR)) { return new Double(this.controller == null && this.controller.getStatistics() != null? 0: this.controller.getStatistics().currentProcessedDocsPerSec()); } if (attribute_name.equals(DOC_RATE_ATTR)) { return new Double(this.controller == null && this.controller.getStatistics() != null? 0: this.controller.getStatistics().processedDocsPerSec()); } if (attribute_name.equals(KB_RATE_ATTR)) { return new Long(this.controller == null && this.controller.getStatistics() != null? 0: this.controller.getStatistics().currentProcessedKBPerSec()); } if (attribute_name.equals(CURRENT_KB_RATE_ATTR)) { return new Long(this.controller == null && this.controller.getStatistics() != null? 0: this.controller.getStatistics().processedKBPerSec()); } if (attribute_name.equals(THREAD_COUNT_ATTR)) { return new Integer(this.controller == null && this.controller.getStatistics() != null? 0: this.controller.getStatistics().activeThreadCount()); } if (attribute_name.equals(FRONTIER_SHORT_REPORT_ATTR)) { return getFrontierOneLine(); } i
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -