crawluri.html

来自「网络爬虫开源代码」· HTML 代码 · 共 1,029 行 · 第 1/5 页

HTML
1,029
字号
</TABLE>&nbsp;<A NAME="fields_inherited_from_class_org.archive.crawler.datamodel.CoreAttributeConstants"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"><TH ALIGN="left"><B>Fields inherited from interface org.archive.crawler.datamodel.<A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html" title="interface in org.archive.crawler.datamodel">CoreAttributeConstants</A></B></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_ANNOTATIONS">A_ANNOTATIONS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_CONTENT_DIGEST">A_CONTENT_DIGEST</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_CONTENT_TYPE">A_CONTENT_TYPE</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_CREDENTIAL_AVATARS_KEY">A_CREDENTIAL_AVATARS_KEY</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DELAY_FACTOR">A_DELAY_FACTOR</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DISTANCE_FROM_SEED">A_DISTANCE_FROM_SEED</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DNS_FETCH_TIME">A_DNS_FETCH_TIME</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_DNS_SERVER_IP_LABEL">A_DNS_SERVER_IP_LABEL</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_ETAG_HEADER">A_ETAG_HEADER</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_FETCH_BEGAN_TIME">A_FETCH_BEGAN_TIME</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_FETCH_COMPLETED_TIME">A_FETCH_COMPLETED_TIME</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_FETCH_HISTORY">A_FETCH_HISTORY</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_FORCE_RETIRE">A_FORCE_RETIRE</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HERITABLE_KEYS">A_HERITABLE_KEYS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTML_BASE">A_HTML_BASE</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTTP_PROXY_HOST">A_HTTP_PROXY_HOST</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTTP_PROXY_PORT">A_HTTP_PROXY_PORT</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_HTTP_TRANSACTION">A_HTTP_TRANSACTION</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_LAST_MODIFIED_HEADER">A_LAST_MODIFIED_HEADER</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_LOCALIZED_ERRORS">A_LOCALIZED_ERRORS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_META_ROBOTS">A_META_ROBOTS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_MINIMUM_DELAY">A_MINIMUM_DELAY</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_MIRROR_PATH">A_MIRROR_PATH</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_PREREQUISITE_URI">A_PREREQUISITE_URI</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_REFERENCE_LENGTH">A_REFERENCE_LENGTH</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_RETRY_DELAY">A_RETRY_DELAY</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_RRECORD_SET_LABEL">A_RRECORD_SET_LABEL</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_RUNTIME_EXCEPTION">A_RUNTIME_EXCEPTION</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_SOURCE_TAG">A_SOURCE_TAG</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#A_STATUS">A_STATUS</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#HEADER_TRUNC">HEADER_TRUNC</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#LENGTH_TRUNC">LENGTH_TRUNC</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#TIMER_TRUNC">TIMER_TRUNC</A>, <A HREF="../../../../org/archive/crawler/datamodel/CoreAttributeConstants.html#TRUNC_SUFFIX">TRUNC_SUFFIX</A></CODE></TD></TR></TABLE>&nbsp;<!-- ======== CONSTRUCTOR SUMMARY ======== --><A NAME="constructor_summary"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"><B>Constructor Summary</B></FONT></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#CrawlURI(org.archive.crawler.datamodel.CandidateURI, long)">CrawlURI</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/CandidateURI.html" title="class in org.archive.crawler.datamodel">CandidateURI</A>&nbsp;caUri,         long&nbsp;o)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Create a new instance of CrawlURI from a <A HREF="../../../../org/archive/crawler/datamodel/CandidateURI.html" title="class in org.archive.crawler.datamodel"><CODE>CandidateURI</CODE></A></TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#CrawlURI(org.archive.net.UURI)">CrawlURI</A></B>(<A HREF="../../../../org/archive/net/UURI.html" title="class in org.archive.net">UURI</A>&nbsp;uuri)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Create a new instance of CrawlURI from a <A HREF="../../../../org/archive/net/UURI.html" title="class in org.archive.net"><CODE>UURI</CODE></A>.</TD></TR></TABLE>&nbsp;<!-- ========== METHOD SUMMARY =========== --><A NAME="method_summary"><!-- --></A><TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""><TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"><TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"><B>Method Summary</B></FONT></TH></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#aboutToLog()">aboutToLog</A></B>()</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Notify CrawlURI it is about to be logged; opportunity for self-annotation</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>static&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#addAlistPersistentMember(java.lang.Object)">addAlistPersistentMember</A></B>(java.lang.Object&nbsp;key)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Add the key of alist items you want to persist across processings.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#addAnnotation(java.lang.String)">addAnnotation</A></B>(java.lang.String&nbsp;annotation)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Add an annotation: an abbrieviated indication of something special about this URI that need not be present in every crawl.log line, but should be noted for future reference.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#addCredentialAvatar(org.archive.crawler.datamodel.credential.CredentialAvatar)">addCredentialAvatar</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/credential/CredentialAvatar.html" title="class in org.archive.crawler.datamodel.credential">CredentialAvatar</A>&nbsp;ca)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Add an avatar.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#addLocalizedError(java.lang.String, java.lang.Throwable, java.lang.String)">addLocalizedError</A></B>(java.lang.String&nbsp;processorName,                  java.lang.Throwable&nbsp;ex,                  java.lang.String&nbsp;message)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Make note of a non-fatal error, local to a particular Processor, which should be logged somewhere, but allows processing to continue.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#addOutLink(org.archive.crawler.extractor.Link)">addOutLink</A></B>(<A HREF="../../../../org/archive/crawler/extractor/Link.html" title="class in org.archive.crawler.extractor">Link</A>&nbsp;link)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Add a discovered Link, unless it would exceed the max number to accept.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected &nbsp;boolean</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#annotationContains(java.lang.String)">annotationContains</A></B>(java.lang.String&nbsp;str2Find)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#clearOutlinks()">clearOutlinks</A></B>()</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#createAndAddLink(java.lang.String, java.lang.CharSequence, char)">createAndAddLink</A></B>(java.lang.String&nbsp;url,                 java.lang.CharSequence&nbsp;context,                 char&nbsp;hopType)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Convenience method for creating a Link with the given string and context</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#createAndAddLinkRelativeToBase(java.lang.String, java.lang.CharSequence, char)">createAndAddLinkRelativeToBase</A></B>(java.lang.String&nbsp;url,                               java.lang.CharSequence&nbsp;context,                               char&nbsp;hopType)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Convenience method for creating a Link with the given string and context, relative to a previously set base HREF if available (or relative to the current CrawlURI if no other base has been set)</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;void</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#createAndAddLinkRelativeToVia(java.lang.String, java.lang.CharSequence, char)">createAndAddLinkRelativeToVia</A></B>(java.lang.String&nbsp;url,                              java.lang.CharSequence&nbsp;context,                              char&nbsp;hopType)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Convenience method for creating a Link with the given string and context, relative to this CrawlURI's via UURI if available.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;<A HREF="../../../../org/archive/crawler/extractor/Link.html" title="class in org.archive.crawler.extractor">Link</A></CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#createLink(java.lang.String, java.lang.CharSequence, char)">createLink</A></B>(java.lang.String&nbsp;url,           java.lang.CharSequence&nbsp;context,           char&nbsp;hopType)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Convenience method for creating a Link discovered at this URI with the given string and context</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>static&nbsp;java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#fetchStatusCodesToString(int)">fetchStatusCodesToString</A></B>(int&nbsp;code)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Takes a status code and converts it into a human readable string.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>static&nbsp;<A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html" title="class in org.archive.crawler.datamodel">CrawlURI</A></CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#from(org.archive.crawler.datamodel.CandidateURI, long)">from</A></B>(<A HREF="../../../../org/archive/crawler/datamodel/CandidateURI.html" title="class in org.archive.crawler.datamodel">CandidateURI</A>&nbsp;caUri,     long&nbsp;ordinal)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Make a <code>CrawlURI</code> from the passed <code>CandidateURI</code>.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#getAnnotations()">getAnnotations</A></B>()</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Get the annotations set for this uri.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;<A HREF="../../../../org/archive/net/UURI.html" title="class in org.archive.net">UURI</A></CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#getBaseURI()">getBaseURI</A></B>()</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Get the (HTML) Base URI used for derelativizing internal URIs.</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>protected &nbsp;java.lang.String</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#getClassSimpleName(java.lang.Class)">getClassSimpleName</A></B>(java.lang.Class&nbsp;c)</CODE><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD></TR><TR BGCOLOR="white" CLASS="TableRowColor"><TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1"><CODE>&nbsp;java.lang.Object</CODE></FONT></TD><TD><CODE><B><A HREF="../../../../org/archive/crawler/datamodel/CrawlURI.html#getContentDigest()">getContentDigest</A></B>()</CODE><BR>

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?