📄 adaptiverevisithostqueue.html
字号:
<A NAME="fields_inherited_from_class_org.archive.crawler.frontier.AdaptiveRevisitAttributeConstants"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"><TH ALIGN="left"><B>Fields inherited from interface org.archive.crawler.frontier.<A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html" title="interface in org.archive.crawler.frontier">AdaptiveRevisitAttributeConstants</A></B></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_CONTENT_STATE_KEY">A_CONTENT_STATE_KEY</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_FETCH_OVERDUE">A_FETCH_OVERDUE</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_LAST_CONTENT_DIGEST">A_LAST_CONTENT_DIGEST</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_LAST_DATESTAMP">A_LAST_DATESTAMP</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_LAST_ETAG">A_LAST_ETAG</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_NUMBER_OF_VERSIONS">A_NUMBER_OF_VERSIONS</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_NUMBER_OF_VISITS">A_NUMBER_OF_VISITS</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_TIME_OF_NEXT_PROCESSING">A_TIME_OF_NEXT_PROCESSING</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_WAIT_INTERVAL">A_WAIT_INTERVAL</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#A_WAIT_REEVALUATED">A_WAIT_REEVALUATED</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#CONTENT_CHANGED">CONTENT_CHANGED</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#CONTENT_UNCHANGED">CONTENT_UNCHANGED</A>, <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitAttributeConstants.html#CONTENT_UNKNOWN">CONTENT_UNKNOWN</A></CODE></TD></TR></TABLE> <A NAME="fields_inherited_from_class_org.archive.crawler.datamodel.CoreAttributeConstants"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"><TH ALIGN="left"><B>Fields inherited from interface org.archive.crawler.datamodel.<A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html" title="interface in org.archive.crawler.datamodel">CoreAttributeConstants</A></B></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_ANNOTATIONS">A_ANNOTATIONS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_CONTENT_TYPE">A_CONTENT_TYPE</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_CREDENTIAL_AVATARS_KEY">A_CREDENTIAL_AVATARS_KEY</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DELAY_FACTOR">A_DELAY_FACTOR</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DISTANCE_FROM_SEED">A_DISTANCE_FROM_SEED</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DNS_FETCH_TIME">A_DNS_FETCH_TIME</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DNS_SERVER_IP_LABEL">A_DNS_SERVER_IP_LABEL</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_FETCH_BEGAN_TIME">A_FETCH_BEGAN_TIME</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_FETCH_COMPLETED_TIME">A_FETCH_COMPLETED_TIME</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_FORCE_RETIRE">A_FORCE_RETIRE</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HERITABLE_KEYS">A_HERITABLE_KEYS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTML_BASE">A_HTML_BASE</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTTP_PROXY_HOST">A_HTTP_PROXY_HOST</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTTP_PROXY_PORT">A_HTTP_PROXY_PORT</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTTP_TRANSACTION">A_HTTP_TRANSACTION</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_LOCALIZED_ERRORS">A_LOCALIZED_ERRORS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_META_ROBOTS">A_META_ROBOTS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_MINIMUM_DELAY">A_MINIMUM_DELAY</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_MIRROR_PATH">A_MIRROR_PATH</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_PREREQUISITE_URI">A_PREREQUISITE_URI</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_RETRY_DELAY">A_RETRY_DELAY</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_RRECORD_SET_LABEL">A_RRECORD_SET_LABEL</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_RUNTIME_EXCEPTION">A_RUNTIME_EXCEPTION</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_SOURCE_TAG">A_SOURCE_TAG</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#HEADER_TRUNC">HEADER_TRUNC</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#LENGTH_TRUNC">LENGTH_TRUNC</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#TIMER_TRUNC">TIMER_TRUNC</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#TRUNC_SUFFIX">TRUNC_SUFFIX</A></CODE></TD></TR></TABLE> <!-- ======== CONSTRUCTOR SUMMARY ======== --><A NAME="constructor_summary"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"><B>Constructor Summary</B></FONT></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#AdaptiveRevisitHostQueue(java.lang.String, com.sleepycat.je.Environment, com.sleepycat.bind.serial.StoredClassCatalog, int)">AdaptiveRevisitHostQueue</A></B>(java.lang.String hostName, com.sleepycat.je.Environment env, com.sleepycat.bind.serial.StoredClassCatalog catalog, int valence)</CODE><BR> Constructor</TD></TR></TABLE> <!-- ========== METHOD SUMMARY =========== --><A NAME="method_summary"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"><B>Method Summary</B></FONT></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#add(org.archive.crawler.datamodel.CrawlURI, boolean)">add</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A> curi, boolean overrideSetTimeOnDups)</CODE><BR> Add a CrawlURI to this host queue.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#addInProcessing(org.archive.crawler.datamodel.CrawlURI)">addInProcessing</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A> curi)</CODE><BR> Adds a CrawlURI to the list of CrawlURIs belonging to this HQ and are being processed at the moment.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#close()">close</A></B>()</CODE><BR> Cleanup all open Berkeley Database objects.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected long</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#countCrawlURIs()">countCrawlURIs</A></B>()</CODE><BR> Count all entries in both primaryUriDB and processingUriDB.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#deleteInProcessing(java.lang.String)">deleteInProcessing</A></B>(java.lang.String uri)</CODE><BR> Removes a URI from the list of URIs belonging to this HQ and are currently being processed.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#flushProcessingURIs()">flushProcessingURIs</A></B>()</CODE><BR> Flush any CrawlURIs in the processingUriDB into the primaryUriDB.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected <A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A></CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getCrawlURI(java.lang.String)">getCrawlURI</A></B>(java.lang.String uri)</CODE><BR> Returns the CrawlURI associated with the specified URI (string) or null if no such CrawlURI is queued in this HQ.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getHostName()">getHostName</A></B>()</CODE><BR> Returns the HQ's name</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> long</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getNextReadyTime()">getNextReadyTime</A></B>()</CODE><BR> Returns the time when the HQ will next be ready to issue a URI.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> long</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getSize()">getSize</A></B>()</CODE><BR> Returns the size of the HQ.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> int</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getState()">getState</A></B>()</CODE><BR> Returns the current state of the HQ.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getStateByName()">getStateByName</A></B>()</CODE><BR> Same as <A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getState()"><CODE>getState()</CODE></A> except this method returns a human readable name for the state instead of its constant integer value.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> <A HREF="../../../../org/archive/crawler/datamodel/CrawlSubstats.html" title="class in org.archive.crawler.datamodel">CrawlSubstats</A></CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#getSubstats()">getSubstats</A></B>()</CODE><BR> </TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected boolean</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#inProcessing(java.lang.String)">inProcessing</A></B>(java.lang.String uri)</CODE><BR> Returns true if this HQ has a CrawlURI matching the uri string currently being processed.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> <A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A></CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#next()">next</A></B>()</CODE><BR> Returns the 'top' URI in the AdaptiveRevisitHostQueue.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> <A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A></CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#peek()">peek</A></B>()</CODE><BR> Returns the URI with the earliest time of next processing.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#reorder()">reorder</A></B>()</CODE><BR> Method is called whenever something has been done that might have changed the value of the 'published' time of next ready.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#report(int)">report</A></B>(int max)</CODE><BR> Returns a report detailing the status of this HQ.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#setNextReadyTime(long)">setNextReadyTime</A></B>(long newTime)</CODE><BR> Updates nextReadyTime (if smaller) with the supplied value</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#setOwner(org.archive.crawler.frontier.AdaptiveRevisitQueueList)">setOwner</A></B>(<A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitQueueList.html" title="class in org.archive.crawler.frontier">AdaptiveRevisitQueueList</A> owner)</CODE><BR> Set the AdaptiveRevisitQueueList object that contains this HQ.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected com.sleepycat.je.OperationStatus</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#strictAdd(org.archive.crawler.datamodel.CrawlURI, boolean)">strictAdd</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A> curi, boolean overrideDuplicates)</CODE><BR> An internal method for adding URIs to the queue.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#update(org.archive.crawler.datamodel.CrawlURI, boolean, long)">update</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A> curi, boolean needWait, long wakeupTime)</CODE><BR> Update CrawlURI that has completed processing.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE> void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/frontier/AdaptiveRevisitHostQueue.html#update(org.archive.crawler.datamodel.CrawlURI, boolean, long, boolean)">update</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A> curi, boolean needWait, long wakeupTime, boolean forgetURI)</CODE><BR> Update CrawlURI that has completed processing.</TD></TR></TABLE> <A NAME="methods_inherited_from_class_java.lang.Object"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"><TH ALIGN="left"><B>Methods inherited from class java.lang.Object</B></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</CODE></TD></TR></TABLE> <P><!-- ============ FIELD DETAIL =========== --><A NAME="field_detail"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="1"><FONT SIZE="+2"><B>Field Detail</B></FONT></TH></TR></TABLE><A NAME="HQSTATE_EMPTY"><!-- --></A><H3>HQSTATE_EMPTY</H3><PRE>public static final int <B>HQSTATE_EMPTY</B></PRE><DL><DD>HQ contains no queued CrawlURIs elements. This state only occurs after queue creation before the first add. After the first item is added the state can never become empty again.<P><DL><DT><B>See Also:</B><DD><A HREF="../../../../constant-values.html#org.archive.crawler.frontier.AdaptiveRevisitHostQueue.HQSTATE_EMPTY">Constant Field Values</A></DL></DL><HR><A NAME="HQSTATE_READY"><!-- --></A><H3>HQSTATE_READY</H3><PRE>public static final int <B>HQSTATE_READY</B></PRE><DL><DD>HQ has a CrawlURI ready for processing
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -