⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crawlcontroller.java

📁 最强的爬虫工程
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
            getPathRelativeToWorkingDirectory(diskPath);        this.disk.mkdirs();        this.logsDisk = getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);        this.checkpointsDisk = getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH);        this.stateDisk = getSettingsDir(CrawlOrder.ATTR_STATE_PATH);        this.scratchDisk = getSettingsDir(CrawlOrder.ATTR_SCRATCH_PATH);    }        /**     * @return The logging directory or null if problem reading the settings.     */    public File getLogsDir() {        File f = null;        try {            f = getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);        } catch (AttributeNotFoundException e) {            LOGGER.severe("Failed get of logs directory: " + e.getMessage());        }        return f;    }        /**     * Return fullpath to the directory named by <code>key</code>     * in settings.     * If directory does not exist, it and all intermediary dirs     * will be created.     * @param key Key to use going to settings.     * @return Full path to directory named by <code>key</code>.     * @throws AttributeNotFoundException     */    public File getSettingsDir(String key)    throws AttributeNotFoundException {        String path = (String)order.getAttribute(null, key);        File f = new File(path);        if (!f.isAbsolute()) {            f = new File(disk.getPath(), path);        }        if (!f.exists()) {            f.mkdirs();        }        return f;    }    /**     * Setup the statistics tracker.     * The statistics object must be created before modules can use it.     * Do it here now so that when modules retrieve the object from the     * controller during initialization (which some do), its in place.     * @throws InvalidAttributeValueException     * @throws FatalConfigurationException     */    private void setupStatTracking()    throws InvalidAttributeValueException, FatalConfigurationException {        MapType loggers = order.getLoggers();        final String cstName = "crawl-statistics";        if (loggers.isEmpty(null)) {            if (!isCheckpointRecover() && this.statistics == null) {                this.statistics = new StatisticsTracker(cstName);            }            loggers.addElement(null, (StatisticsTracker)this.statistics);        }                if (isCheckpointRecover()) {            restoreStatisticsTracker(loggers, cstName);        }        for (Iterator it = loggers.iterator(null); it.hasNext();) {            StatisticsTracking tracker = (StatisticsTracking)it.next();            tracker.initialize(this);            if (this.statistics == null) {                this.statistics = tracker;            }        }    }        protected void restoreStatisticsTracker(MapType loggers,        String replaceName)    throws FatalConfigurationException {        try {            // Add the deserialized statstracker to the settings system.            loggers.removeElement(loggers.globalSettings(), replaceName);            loggers.addElement(loggers.globalSettings(),                (StatisticsTracker)this.statistics);         } catch (Exception e) {             throw convertToFatalConfigurationException(e);         }    }        protected FatalConfigurationException            convertToFatalConfigurationException(Exception e) {        FatalConfigurationException fce =            new FatalConfigurationException("Converted exception: " +               e.getMessage());        fce.setStackTrace(e.getStackTrace());        return fce;    }    private void setupLogs() throws IOException {        String logsPath = logsDisk.getAbsolutePath() + File.separatorChar;        uriProcessing = Logger.getLogger(LOGNAME_CRAWL + "." + logsPath);        runtimeErrors = Logger.getLogger(LOGNAME_RUNTIME_ERRORS + "." +            logsPath);        localErrors = Logger.getLogger(LOGNAME_LOCAL_ERRORS + "." + logsPath);        uriErrors = Logger.getLogger(LOGNAME_URI_ERRORS + "." + logsPath);        progressStats = Logger.getLogger(LOGNAME_PROGRESS_STATISTICS + "." +            logsPath);        this.fileHandlers = new HashMap();        setupLogFile(uriProcessing,            logsPath + LOGNAME_CRAWL + CURRENT_LOG_SUFFIX,            new UriProcessingFormatter(), true);        setupLogFile(runtimeErrors,            logsPath + LOGNAME_RUNTIME_ERRORS + CURRENT_LOG_SUFFIX,            new RuntimeErrorFormatter(), true);        setupLogFile(localErrors,            logsPath + LOGNAME_LOCAL_ERRORS + CURRENT_LOG_SUFFIX,            new LocalErrorFormatter(), true);        setupLogFile(uriErrors,            logsPath + LOGNAME_URI_ERRORS + CURRENT_LOG_SUFFIX,            new UriErrorFormatter(), true);        setupLogFile(progressStats,            logsPath + LOGNAME_PROGRESS_STATISTICS + CURRENT_LOG_SUFFIX,            new StatisticsLogFormatter(), true);    }    private void setupLogFile(Logger logger, String filename, Formatter f,            boolean shouldManifest) throws IOException, SecurityException {        GenerationFileHandler fh = new GenerationFileHandler(filename, true,            shouldManifest);        fh.setFormatter(f);        logger.addHandler(fh);        addToManifest(filename, MANIFEST_LOG_FILE, shouldManifest);        logger.setUseParentHandlers(false);        this.fileHandlers.put(logger, fh);    }        protected void rotateLogFiles(String generationSuffix)    throws IOException {        if (this.state != PAUSED && this.state != CHECKPOINTING) {            throw new IllegalStateException("Pause crawl before requesting " +                "log rotation.");        }        for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) {            Logger l = (Logger)i.next();            GenerationFileHandler gfh =                (GenerationFileHandler)fileHandlers.get(l);            GenerationFileHandler newGfh =                gfh.rotate(generationSuffix, CURRENT_LOG_SUFFIX);            if (gfh.shouldManifest()) {                addToManifest((String) newGfh.getFilenameSeries().get(1),                    MANIFEST_LOG_FILE, newGfh.shouldManifest());            }            l.removeHandler(gfh);            l.addHandler(newGfh);            fileHandlers.put(l, newGfh);        }    }    /**     * Close all log files and remove handlers from loggers.     */    public void closeLogFiles() {       for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) {            Logger l = (Logger)i.next();            GenerationFileHandler gfh =                (GenerationFileHandler)fileHandlers.get(l);            gfh.close();            l.removeHandler(gfh);        }    }    /**     * Sets the values for max bytes, docs and time based on crawl order.      */    private void setThresholds() {        try {            maxBytes =                ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_BYTES_DOWNLOAD))                    .longValue();        } catch (Exception e) {            maxBytes = 0;        }        try {            maxDocument =                ((Long) order                    .getAttribute(CrawlOrder.ATTR_MAX_DOCUMENT_DOWNLOAD))                    .longValue();        } catch (Exception e) {            maxDocument = 0;        }        try {            maxTime =                ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_TIME_SEC))                    .longValue();        } catch (Exception e) {            maxTime = 0;        }    }    /**     * @return Object this controller is using to track crawl statistics     */    public StatisticsTracking getStatistics() {        return statistics==null ?            new StatisticsTracker("crawl-statistics"): this.statistics;    }        /**     * Send crawl change event to all listeners.     * @param newState State change we're to tell listeners' about.     * @param message Message on state change.     * @see #sendCheckpointEvent(File) for special case event sending     * telling listeners to checkpoint.     */    protected void sendCrawlStateChangeEvent(Object newState, String message) {        synchronized (this.registeredCrawlStatusListeners) {            this.state = newState;            for (Iterator i = this.registeredCrawlStatusListeners.iterator();                    i.hasNext();) {                CrawlStatusListener l = (CrawlStatusListener)i.next();                if (newState.equals(PAUSED)) {                   l.crawlPaused(message);                } else if (newState.equals(RUNNING)) {                    l.crawlResuming(message);                } else if (newState.equals(PAUSING)) {                   l.crawlPausing(message);                } else if (newState.equals(STARTED)) {                    l.crawlStarted(message);                } else if (newState.equals(STOPPING)) {                    l.crawlEnding(message);                } else if (newState.equals(FINISHED)) {                    l.crawlEnded(message);                } else if (newState.equals(PREPARING)) {                    l.crawlResuming(message);                } else {                    throw new RuntimeException("Unknown state: " + newState);                }                if (LOGGER.isLoggable(Level.FINE)) {                    LOGGER.fine("Sent " + newState + " to " + l);                }            }            LOGGER.fine("Sent " + newState);        }    }        /**     * Send the checkpoint event.     * Has its own method apart from     * {@link #sendCrawlStateChangeEvent(Object, String)} because checkpointing     * throws an Exception (Didn't want to have to wrap all of the     * sendCrawlStateChangeEvent in try/catches).     * @param checkpointDir Where to write checkpoint state to.     * @throws Exception     */    protected void sendCheckpointEvent(File checkpointDir) throws Exception {        synchronized (this.registeredCrawlStatusListeners) {            if (this.state != PAUSED) {                throw new IllegalStateException("Crawler must be completly " +                    "paused before checkpointing can start");            }            this.state = CHECKPOINTING;            for (Iterator i = this.registeredCrawlStatusListeners.iterator();                    i.hasNext();) {                CrawlStatusListener l = (CrawlStatusListener)i.next();                l.crawlCheckpoint(checkpointDir);                if (LOGGER.isLoggable(Level.FINE)) {                    LOGGER.fine("Sent " + CHECKPOINTING + " to " + l);                }            }            LOGGER.fine("Sent " + CHECKPOINTING);        }    }    /**     * Operator requested crawl begin     */    public void requestCrawlStart() {        runProcessorInitialTasks();        sendCrawlStateChangeEvent(STARTED, CrawlJob.STATUS_PENDING);        String jobState;        state = RUNNING;        jobState = CrawlJob.STATUS_RUNNING;        sendCrawlStateChangeEvent(this.state, jobState);        // A proper exit will change this value.        this.sExit = CrawlJob.STATUS_FINISHED_ABNORMAL;                Thread statLogger = new Thread(statistics);        statLogger.setName("StatLogger");        statLogger.start();                frontier.start();    }    /**     * Called when the last toethread exits.     */    protected void completeStop() {        LOGGER.fine("Entered complete stop.");        // Run processors' final tasks        runProcessorFinalTasks();        // Ok, now we are ready to exit.        sendCrawlStateChangeEvent(FINISHED, this.sExit);        synchronized (this.registeredCrawlStatusListeners) {            // Remove all listeners now we're done with them.            this.registeredCrawlStatusListeners.                removeAll(this.registeredCrawlStatusListeners);            this.registeredCrawlStatusListeners = null;        }                closeLogFiles();                // Release reference to logger file handler instances.        this.fileHandlers = null;        this.uriErrors = null;        this.uriProcessing = null;        this.localErrors = null;        this.runtimeErrors = null;        this.progressStats = null;        this.reports = null;        this.manifest = null;        // Do cleanup.        this.statistics = null;        this.frontier = null;        this.disk = null;        this.scratchDisk = null;        this.order = null;        this.scope = null;        if (this.settingsHandler !=  null) {            this.settingsHandler.cleanup();        }        this.settingsHandler = null;        this.reserveMemory = null;        this.processorChains = null;        if (this.serverCache != null) {            this.serverCache.cleanup();            this.serverCache = null;        }        if (this.checkpointer != null) {            this.checkpointer.cleanup();            this.checkpointer = null;        }        if (this.classCatalogDB != null) {            try {                this.classCatalogDB.close();            } catch (DatabaseException e) {                e.printStackTrace();            }            this.classCatalogDB = null;        }        if (this.bdbEnvironment != null) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -