📄 adaptiverevisitfrontier.java
字号:
try{ long completeTime = curi.getLong(A_FETCH_COMPLETED_TIME); long durationTaken = (completeTime - curi.getLong(A_FETCH_BEGAN_TIME)); durationToWait = (long)( ((Float) getAttribute(ATTR_DELAY_FACTOR, curi)) .floatValue() * durationTaken); long minDelay = ((Integer) getAttribute(ATTR_MIN_DELAY, curi)).longValue(); if (minDelay > durationToWait) { // wait at least the minimum durationToWait = minDelay; } long maxDelay = ((Integer) getAttribute(ATTR_MAX_DELAY, curi)).longValue(); if (durationToWait > maxDelay) { // wait no more than the maximum durationToWait = maxDelay; } } catch (AttributeNotFoundException e) { logger.severe("Unable to find attribute. " + curi.toString()); //Wait for max interval. durationToWait = DEFAULT_MAX_DELAY.longValue(); } } long ret = durationToWait > DEFAULT_MIN_DELAY.longValue() ? durationToWait : DEFAULT_MIN_DELAY.longValue(); logger.finest("Snooze time for " + curi.toString() + " = " + ret ); return ret; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#discoveredUriCount() */ public synchronized long discoveredUriCount() { return (this.alreadyIncluded != null) ? this.alreadyIncluded.count() : hostQueues.getSize(); } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#queuedUriCount() */ public synchronized long queuedUriCount() { return hostQueues.getSize(); } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#finishedUriCount() */ public long finishedUriCount() { return succeededFetchCount+failedFetchCount+disregardedUriCount; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#succeededFetchCount() */ public long succeededFetchCount() { return succeededFetchCount; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#failedFetchCount() */ public long failedFetchCount() { return failedFetchCount; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#disregardedUriCount() */ public long disregardedUriCount() { return disregardedUriCount++; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#totalBytesWritten() */ public long totalBytesWritten() { return totalProcessedBytes; } /** * Method is not supported by this Frontier implementation.. * @param pathToLog * @throws IOException */ public void importRecoverLog(String pathToLog) throws IOException { throw new IOException("Unsupported by this frontier."); } public synchronized FrontierMarker getInitialMarker(String regexpr, boolean inCacheOnly) { return null; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#getURIsList(org.archive.crawler.framework.FrontierMarker, int, boolean) */ public synchronized ArrayList getURIsList(FrontierMarker marker, int numberOfMatches, boolean verbose) throws InvalidFrontierMarkerException { // TODO Auto-generated method stub return null; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#deleteURIs(java.lang.String) */ public synchronized long deleteURIs(String match) { // TODO Auto-generated method stub return 0; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#deleted(org.archive.crawler.datamodel.CrawlURI) */ public synchronized void deleted(CrawlURI curi) { // TODO Auto-generated method stub } public void considerIncluded(UURI u) { // This will cause the URI to be crawled!!! CrawlURI curi = new CrawlURI(u); innerSchedule(curi); } public void kickUpdate() { loadSeeds(); } public void start() { unpause(); } synchronized public void pause() { shouldPause = true; notifyAll(); } synchronized public void unpause() { shouldPause = false; notifyAll(); } synchronized public void terminate() { shouldTerminate = true; } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#getFrontierJournal() */ public FrontierJournal getFrontierJournal() { return null; } private static class ThreadLocalQueue extends ThreadLocal<Queue<CandidateURI>> implements Serializable { private static final long serialVersionUID = 8268977225156462059L; protected Queue<CandidateURI> initialValue() { return new MemQueue<CandidateURI>(); } /** * @return Queue of 'batched' items */ public Queue<CandidateURI> getQueue() { return get(); } } /** * This method is not supported by this Frontier implementation * @param pathToLog * @param retainFailures * @throws IOException */ public void importRecoverLog(String pathToLog, boolean retainFailures) throws IOException { throw new IOException("Unsupported"); } // // Reporter implementation // public String[] getReports() { // none but default for now return new String[] {}; } /* (non-Javadoc) * @see org.archive.util.Reporter#singleLineReport() */ public String singleLineReport() { return ArchiveUtils.singleLineReport(this); } /* (non-Javadoc) * @see org.archive.util.Reporter#reportTo(java.io.Writer) */ public void reportTo(PrintWriter writer) throws IOException { reportTo(null,writer); } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#oneLineReport() */ public synchronized void singleLineReportTo(PrintWriter w) throws IOException { hostQueues.singleLineReportTo(w); } /* (non-Javadoc) * @see org.archive.util.Reporter#singleLineLegend() */ public String singleLineLegend() { return hostQueues.singleLineLegend(); } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#report() */ public synchronized void reportTo(String name, PrintWriter writer) { // ignore name; only one report for now hostQueues.reportTo(name, writer); } /* (non-Javadoc) * @see org.archive.crawler.event.CrawlStatusListener#crawlStarted(java.lang.String) */ public void crawlStarted(String message) { // Not interested } /* (non-Javadoc) * @see org.archive.crawler.event.CrawlStatusListener#crawlEnding(java.lang.String) */ public void crawlEnding(String sExitMessage) { // Not interested } /* (non-Javadoc) * @see org.archive.crawler.event.CrawlStatusListener#crawlEnded(java.lang.String) */ public void crawlEnded(String sExitMessage) { // Cleanup! if (this.alreadyIncluded != null) { this.alreadyIncluded.close(); this.alreadyIncluded = null; } hostQueues.close(); } /* (non-Javadoc) * @see org.archive.crawler.event.CrawlStatusListener#crawlPausing(java.lang.String) */ public void crawlPausing(String statusMessage) { // Not interested } /* (non-Javadoc) * @see org.archive.crawler.event.CrawlStatusListener#crawlPaused(java.lang.String) */ public void crawlPaused(String statusMessage) { // Not interested } /* (non-Javadoc) * @see org.archive.crawler.event.CrawlStatusListener#crawlResuming(java.lang.String) */ public void crawlResuming(String statusMessage) { // Not interested } /* (non-Javadoc) * @see org.archive.crawler.event.CrawlStatusListener#crawlCheckpoint(java.io.File) */ public void crawlCheckpoint(File checkpointDir) throws Exception { // Not interested } /* (non-Javadoc) * @see org.archive.crawler.datamodel.UriUniqFilter.HasUriReceiver#receive(org.archive.crawler.datamodel.CandidateURI) */ public void receive(CandidateURI item) { System.out.println("Received " + item); innerSchedule(item); } /* (non-Javadoc) * @see org.archive.crawler.framework.Frontier#getGroup(org.archive.crawler.datamodel.CrawlURI) */ public FrontierGroup getGroup(CrawlURI curi) { try { return getHQ(curi); } catch (IOException ioe) { throw new RuntimeException(ioe); } } public long averageDepth() { return hostQueues.getAverageDepth(); } public float congestionRatio() { return hostQueues.getCongestionRatio(); } public long deepestUri() { return hostQueues.getDeepestQueueSize(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -