⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crawljob.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
        attributes.add(new OpenMBeanAttributeInfoSupport(DOC_RATE_ATTR,            "Crawling rate (Docs/sec)", SimpleType.DOUBLE,            true, false, false));        attributes.add(new OpenMBeanAttributeInfoSupport(KB_RATE_ATTR,            "Current crawling rate (Kb/sec)", SimpleType.LONG,            true, false, false));        attributes.add(new OpenMBeanAttributeInfoSupport(DOWNLOAD_COUNT_ATTR,            "Count of downloaded documents", SimpleType.LONG,            true, false, false));        attributes.add(new OpenMBeanAttributeInfoSupport(DISCOVERED_COUNT_ATTR,            "Count of discovered documents", SimpleType.LONG,            true, false, false));                // Add in the crawl order attributes.        addCrawlOrderAttributes(this.getController().getOrder(), attributes);                // Add the bdbje attributes.  Convert to open mbean attributes.        // First do bdbeje setup.  Then add a subset of the bdbje attributes.        // Keep around the list of names as a convenience for when it comes        // time to test if attribute is supported.        Environment env = this.controller.getBdbEnvironment();        try {            this.bdbjeMBeanHelper =                new JEMBeanHelper(env.getConfig(), env.getHome(), true);        } catch (DatabaseException e) {            e.printStackTrace();            InitializationException ie =                new InitializationException(e.getMessage());            ie.setStackTrace(e.getStackTrace());            throw ie;        }        this.bdbjeAttributeNameList = Arrays.asList(new String [] {                JEMBeanHelper.ATT_ENV_HOME,                JEMBeanHelper.ATT_OPEN,                JEMBeanHelper.ATT_IS_READ_ONLY,                JEMBeanHelper.ATT_IS_TRANSACTIONAL,                JEMBeanHelper.ATT_CACHE_SIZE,                JEMBeanHelper.ATT_CACHE_PERCENT,                JEMBeanHelper.ATT_LOCK_TIMEOUT,                JEMBeanHelper.ATT_IS_SERIALIZABLE,                JEMBeanHelper.ATT_SET_READ_ONLY,        });        addBdbjeAttributes(attributes,                this.bdbjeMBeanHelper.getAttributeList(env),                this.bdbjeAttributeNameList);        // Operations.        List<OpenMBeanOperationInfo> operations         = new ArrayList<OpenMBeanOperationInfo>();        OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[3];        args[0] = new OpenMBeanParameterInfoSupport("url",            "URL to add to the frontier", SimpleType.STRING);        args[1] = new OpenMBeanParameterInfoSupport("forceFetch",            "True if URL is to be force fetched", SimpleType.BOOLEAN);        args[2] = new OpenMBeanParameterInfoSupport("seed",            "True if URL is a seed", SimpleType.BOOLEAN);        operations.add(new OpenMBeanOperationInfoSupport(IMPORT_URI_OPER,            "Add passed URL to the frontier", args, SimpleType.VOID,                MBeanOperationInfo.ACTION));                args = new OpenMBeanParameterInfoSupport[4];        args[0] = new OpenMBeanParameterInfoSupport("pathOrUrl",            "Path or URL to file of URLs", SimpleType.STRING);        args[1] = new OpenMBeanParameterInfoSupport("style",            "Format format:default|crawlLog|recoveryJournal",            SimpleType.STRING);        args[2] = new OpenMBeanParameterInfoSupport("forceFetch",            "True if URLs are to be force fetched", SimpleType.BOOLEAN);        args[3] = new OpenMBeanParameterInfoSupport("seed",            "True if all content are seeds.", SimpleType.BOOLEAN);        operations.add(new OpenMBeanOperationInfoSupport(IMPORT_URIS_OPER,            "Add file of passed URLs to the frontier", args, SimpleType.STRING,                MBeanOperationInfo.ACTION));                operations.add(new OpenMBeanOperationInfoSupport(PAUSE_OPER,            "Pause crawling (noop if already paused)", null, SimpleType.VOID,            MBeanOperationInfo.ACTION));                operations.add(new OpenMBeanOperationInfoSupport(RESUME_OPER,            "Resume crawling (noop if already resumed)", null,            SimpleType.VOID, MBeanOperationInfo.ACTION));                args = new OpenMBeanParameterInfoSupport[1];        args[0] = new OpenMBeanParameterInfoSupport("name",            "Name of report ('all', 'standard', etc.).", SimpleType.STRING);        operations.add(new OpenMBeanOperationInfoSupport(FRONTIER_REPORT_OPER,             "Full frontier report", args, SimpleType.STRING,             MBeanOperationInfo.INFO));                operations.add(new OpenMBeanOperationInfoSupport(THREADS_REPORT_OPER,             "Full thread report", null, SimpleType.STRING,             MBeanOperationInfo.INFO));                operations.add(new OpenMBeanOperationInfoSupport(SEEDS_REPORT_OPER,             "Seeds report", null, SimpleType.STRING, MBeanOperationInfo.INFO));           operations.add(                new OpenMBeanOperationInfoSupport(PROGRESS_STATISTICS_OPER,                "Progress statistics at time of invocation", null,                SimpleType.STRING, MBeanOperationInfo.INFO));                 operations.add(new OpenMBeanOperationInfoSupport(            PROGRESS_STATISTICS_LEGEND_OPER,                "Progress statistics legend", null,                SimpleType.STRING, MBeanOperationInfo.INFO));                  operations.add(new OpenMBeanOperationInfoSupport(CHECKPOINT_OPER,                "Start a checkpoint", null, SimpleType.VOID,                MBeanOperationInfo.ACTION));                        // Add bdbje operations. Add subset only. Keep around the list so have        // it to hand when figuring what operations are supported. Usual actual        // Strings because not accessible from JEMBeanHelper.        this.bdbjeOperationsNameList = Arrays.asList(new String[] { "cleanLog",                "evictMemory", "checkpoint", "sync",                "getEnvironmentStatsToString", "getLockStatsToString",                "getDatabaseNames", OP_DB_STAT        });        addBdbjeOperations(operations,                this.bdbjeMBeanHelper.getOperationList(env),                this.bdbjeOperationsNameList);                // Register notifications        List<MBeanNotificationInfo> notifications         = new ArrayList<MBeanNotificationInfo>();        notifications.add(            new MBeanNotificationInfo(new String [] {"crawlStarted",                    "crawlEnding", "crawlPaused", "crawlResuming", PROG_STATS},                this.getClass().getName() + ".notifications",                "CrawlStatusListener events and progress statistics as " +                    "notifications"));        MBeanNotificationInfo [] notificationsArray =            new MBeanNotificationInfo[notifications.size()];        notifications.toArray(notificationsArray);                // Build the info object.        OpenMBeanAttributeInfoSupport[] attributesArray =            new OpenMBeanAttributeInfoSupport[attributes.size()];        attributes.toArray(attributesArray);        OpenMBeanOperationInfoSupport[] operationsArray =            new OpenMBeanOperationInfoSupport[operations.size()];        operations.toArray(operationsArray);        return new OpenMBeanInfoSupport(this.getClass().getName(),            "Current Crawl Job as OpenMBean",            attributesArray,            new OpenMBeanConstructorInfoSupport [] {},            operationsArray,            notificationsArray);    }        protected void addBdbjeAttributes(            final List<OpenMBeanAttributeInfo> attributes,            final List<MBeanAttributeInfo> bdbjeAttributes,             final List<String> bdbjeNamesToAdd) {        for (MBeanAttributeInfo info: bdbjeAttributes) {            if (bdbjeNamesToAdd.contains(info.getName())) {                attributes.add(JmxUtils.convertToOpenMBeanAttribute(info));            }        }       }        protected void addBdbjeOperations(            final List<OpenMBeanOperationInfo> operations,            final List<MBeanOperationInfo> bdbjeOperations,             final List<String> bdbjeNamesToAdd) {        for (MBeanOperationInfo info: bdbjeOperations) {            if (bdbjeNamesToAdd.contains(info.getName())) {                OpenMBeanOperationInfo omboi = null;                if (info.getName().equals(OP_DB_STAT)) {                    // Db stats needs special handling. The published                    // signature is wrong and its return type is awkward.                    // Handle it.                    omboi = JmxUtils.convertToOpenMBeanOperation(info, null,                        SimpleType.STRING);                    MBeanParameterInfo[] params = omboi.getSignature();                    OpenMBeanParameterInfo[] args =                        new OpenMBeanParameterInfoSupport[params.length + 1];                    for (int ii = 0; ii < params.length; ii++) {                        args[ii] = (OpenMBeanParameterInfo) params[ii];                    }                    args[params.length] = new OpenMBeanParameterInfoSupport(                            "name", "Database name", SimpleType.STRING);                    omboi = new OpenMBeanOperationInfoSupport(omboi.getName(),                        omboi.getDescription(), args, omboi.getReturnOpenType(),                        omboi.getImpact());                } else {                    omboi = JmxUtils.convertToOpenMBeanOperation(info);                }                operations.add(omboi);            }        }    }        protected void addCrawlOrderAttributes(final ComplexType type,            final List<OpenMBeanAttributeInfo> attributes) {        for (final Iterator i = type.getAttributeInfoIterator(null);                i.hasNext();) {            ModuleAttributeInfo info = (ModuleAttributeInfo)i.next();            if (ORDER_EXCLUDE.contains(info.getName())) {                // Skip.                continue;            }            String absoluteName = type.getAbsoluteName() + "/" + info.getName();            if (JmxUtils.isOpenType(info.getType())) {                String description = info.getDescription();                if (description == null || description.length() <= 0) {                    // Description can't be empty.                    description = info.getName();                }                attributes.add(new OpenMBeanAttributeInfoSupport(                    absoluteName, description,                    JmxUtils.getOpenType(info.getType()), true, true, false));            } else if(info.isComplexType()) {                try {                    ComplexType c =                        (ComplexType)type.getAttribute(info.getName());                    addCrawlOrderAttributes(c, attributes);                } catch (AttributeNotFoundException e) {                    logger.log(Level.SEVERE, "Failed get of attribute", e);                } catch (MBeanException e) {                    logger.log(Level.SEVERE, "Failed get of attribute", e);                } catch (ReflectionException e) {                    logger.log(Level.SEVERE, "Failed get of attribute", e);                }            } else if (info.getType().equals(TextField.class.getName())) {                // Special handling for TextField.  Use the STRING OpenType.                attributes.add(new OpenMBeanAttributeInfoSupport(                        absoluteName, info.getDescription(),                        SimpleType.STRING, true, true, false));            } else {                // Looks like only type we don't currently handle is StringList.                // Figure how to do it.  Add as AttributeList?                logger.fine(info.getType());            }        }    }        public Object getAttribute(String attribute_name)    throws AttributeNotFoundException {        if (attribute_name == null) {            throw new RuntimeOperationsException(                 new IllegalArgumentException("Attribute name cannot be null"),                 "Cannot call getAttribute with null attribute name");        }                // If no controller, we can't do any work in here.        if (this.controller == null) {            throw new RuntimeOperationsException(                 new NullPointerException("Controller is null"),                 "Controller is null");        }                // Is it a bdbje attribute?        if (this.bdbjeAttributeNameList.contains(attribute_name)) {            try {                return this.bdbjeMBeanHelper.getAttribute(                        this.controller.getBdbEnvironment(), attribute_name);            } catch (MBeanException e) {                throw new RuntimeOperationsException(new RuntimeException(e));            }        }                // Is it a crawl-order attribute?        if (attribute_name.                startsWith(this.controller.getOrder().getAbsoluteName())) {            return getCrawlOrderAttribute(attribute_name);        }                if (!ATTRIBUTE_LIST.contains(attribute_name)) {            throw new AttributeNotFoundException("Attribute " +                    attribute_name + " is unimplemented.");        }        // The pattern in the below is to match an attribute and when found        // do a return out of if clause.  Doing it this way, I can fall        // on to the AttributeNotFoundException for case where we've an        // attribute but no handler.        if (attribute_name.equals(STATUS_ATTR)) {            return getCrawlStatus();        }        if (attribute_name.equals(NAME_ATTR)) {            return getJobName();        }        if (attribute_name.equals(UID_ATTR)) {            return getUID();        }        if (attribute_name.equals(TOTAL_DATA_ATTR)) {            return new Long(this.controller == null &&                    this.controller.getStatistics() != null? 0:                this.controller.getStatistics().totalBytesWritten());        }        if (attribute_name.equals(CRAWL_TIME_ATTR)) {            return new Long(this.controller == null &&                    this.controller.getStatistics() != null? 0:                this.controller.getStatistics().getCrawlerTotalElapsedTime() /                    1000);        }        if (attribute_name.equals(CURRENT_DOC_RATE_ATTR)) {            return new Double(this.controller == null &&                    this.controller.getStatistics() != null? 0:                this.controller.getStatistics().currentProcessedDocsPerSec());        }        if (attribute_name.equals(DOC_RATE_ATTR)) {            return new Double(this.controller == null &&                    this.controller.getStatistics() != null? 0:                this.controller.getStatistics().processedDocsPerSec());        }        if (attribute_name.equals(KB_RATE_ATTR)) {            return new Long(this.controller == null &&                    this.controller.getStatistics() != null? 0:                this.controller.getStatistics().currentProcessedKBPerSec());        }        if (attribute_name.equals(CURRENT_KB_RATE_ATTR)) {            return new Long(this.controller == null &&                    this.controller.getStatistics() != null? 0:                this.controller.getStatistics().processedKBPerSec());        }        if (attribute_name.equals(THREAD_COUNT_ATTR)) {            return new Integer(this.controller == null &&                    this.controller.getStatistics() != null? 0:                this.controller.getStatistics().activeThreadCount());        }               if (attribute_name.equals(FRONTIER_SHORT_REPORT_ATTR)) {            return getFrontierOneLine();        }        i

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -