⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 adaptiverevisithostqueue.html

📁 用JAVA编写的,在做实验的时候留下来的,本来想删的,但是传上来,大家分享吧
💻 HTML
📖 第 1 页 / 共 5 页
字号:
<a name="568" href="#568">568</a>                     + opStatus.toString());<a name="569" href="#569">569</a>         }<a name="570" href="#570">570</a>     }<a name="571" href="#571">571</a>     <a name="572" href="#572">572</a>     <em>/**<em>*</em></em><a name="573" href="#573">573</a> <em>     * Returns the CrawlURI associated with the specified URI (string) or null</em><a name="574" href="#574">574</a> <em>     * if no such CrawlURI is queued in this HQ. If CrawlURI is being processed</em><a name="575" href="#575">575</a> <em>     * it is not considered to be &lt;i>queued &lt;/i> and this method will return</em><a name="576" href="#576">576</a> <em>     * null for any such URIs.</em><a name="577" href="#577">577</a> <em>     * </em><a name="578" href="#578">578</a> <em>     * @param uri</em><a name="579" href="#579">579</a> <em>     *            A string representing the URI</em><a name="580" href="#580">580</a> <em>     * @return the CrawlURI associated with the specified URI (string) or null</em><a name="581" href="#581">581</a> <em>     *         if no such CrawlURI is queued in this HQ.</em><a name="582" href="#582">582</a> <em>     * </em><a name="583" href="#583">583</a> <em>     * @throws DatabaseException</em><a name="584" href="#584">584</a> <em>     *             if a errors occurs reading the database</em><a name="585" href="#585">585</a> <em>     */</em><a name="586" href="#586">586</a>     <strong>protected</strong> <a href="../../../../org/archive/crawler/datamodel/CrawlURI.html">CrawlURI</a> getCrawlURI(String uri) throws DatabaseException{<a name="587" href="#587">587</a>         DatabaseEntry keyEntry = <strong>new</strong> DatabaseEntry();<a name="588" href="#588">588</a>         DatabaseEntry dataEntry = <strong>new</strong> DatabaseEntry();<a name="589" href="#589">589</a>         <a name="590" href="#590">590</a>         primaryKeyBinding.objectToEntry(uri,keyEntry);<a name="591" href="#591">591</a>         primaryUriDB.get(<strong>null</strong>,keyEntry,dataEntry,LockMode.DEFAULT);<a name="592" href="#592">592</a>         <a name="593" href="#593">593</a>         <a href="../../../../org/archive/crawler/datamodel/CrawlURI.html">CrawlURI</a> curi = (CrawlURI)crawlURIBinding.entryToObject(dataEntry);<a name="594" href="#594">594</a>         <a name="595" href="#595">595</a>         <strong>return</strong> curi;<a name="596" href="#596">596</a>     }<a name="597" href="#597">597</a> <a name="598" href="#598">598</a>     <em>/**<em>*</em></em><a name="599" href="#599">599</a> <em>     * Update CrawlURI that has completed processing.</em><a name="600" href="#600">600</a> <em>     * </em><a name="601" href="#601">601</a> <em>     * @param curi The CrawlURI. This must be a CrawlURI issued by this HQ's </em><a name="602" href="#602">602</a> <em>     *             {@link #next() next()} method.</em><a name="603" href="#603">603</a> <em>     * @param needWait If true then the URI was processed successfully, </em><a name="604" href="#604">604</a> <em>     *                 requiring a period of suspended action on that host. If</em><a name="605" href="#605">605</a> <em>     *                 valence is > 1 then seperate times are maintained for </em><a name="606" href="#606">606</a> <em>     *                 each slot.</em><a name="607" href="#607">607</a> <em>     * @param wakeupTime If new state is </em><a name="608" href="#608">608</a> <em>     *                   {@link AdaptiveRevisitHostQueue#HQSTATE_SNOOZED snoozed}</em><a name="609" href="#609">609</a> <em>     *                   then this parameter should contain the time (in </em><a name="610" href="#610">610</a> <em>     *                   milliseconds) when it will be safe to wake the HQ up</em><a name="611" href="#611">611</a> <em>     *                   again. Otherwise this parameter will be ignored.</em><a name="612" href="#612">612</a> <em>     * </em><a name="613" href="#613">613</a> <em>     * @throws IllegalStateException if the CrawlURI</em><a name="614" href="#614">614</a> <em>     *         does not match a CrawlURI issued for crawling by this HQ's</em><a name="615" href="#615">615</a> <em>     *         {@link AdaptiveRevisitHostQueue#next() next()}.</em><a name="616" href="#616">616</a> <em>     * @throws IOException if an error occurs accessing the database</em><a name="617" href="#617">617</a> <em>     */</em><a name="618" href="#618">618</a>     <strong>public</strong> <strong>void</strong> update(<a href="../../../../org/archive/crawler/datamodel/CrawlURI.html">CrawlURI</a> curi, <a name="619" href="#619">619</a>                        <strong>boolean</strong> needWait, <a name="620" href="#620">620</a>                        <strong>long</strong> wakeupTime) <a name="621" href="#621">621</a>             throws IllegalStateException, IOException{<a name="622" href="#622">622</a>         update(curi,needWait,wakeupTime,false);<a name="623" href="#623">623</a>     }<a name="624" href="#624">624</a>     <a name="625" href="#625">625</a>     <a name="626" href="#626">626</a>     <em>/**<em>*</em></em><a name="627" href="#627">627</a> <em>     * Update CrawlURI that has completed processing.</em><a name="628" href="#628">628</a> <em>     * </em><a name="629" href="#629">629</a> <em>     * @param curi The CrawlURI. This must be a CrawlURI issued by this HQ's </em><a name="630" href="#630">630</a> <em>     *             {@link #next() next()} method.</em><a name="631" href="#631">631</a> <em>     * @param needWait If true then the URI was processed successfully, </em><a name="632" href="#632">632</a> <em>     *                 requiring a period of suspended action on that host. If</em><a name="633" href="#633">633</a> <em>     *                 valence is > 1 then seperate times are maintained for </em><a name="634" href="#634">634</a> <em>     *                 each slot.</em><a name="635" href="#635">635</a> <em>     * @param wakeupTime If new state is </em><a name="636" href="#636">636</a> <em>     *                   {@link AdaptiveRevisitHostQueue#HQSTATE_SNOOZED snoozed}</em><a name="637" href="#637">637</a> <em>     *                   then this parameter should contain the time (in </em><a name="638" href="#638">638</a> <em>     *                   milliseconds) when it will be safe to wake the HQ up</em><a name="639" href="#639">639</a> <em>     *                   again. Otherwise this parameter will be ignored.</em><a name="640" href="#640">640</a> <em>     * @param forgetURI If true, the URI will be deleted from the queue.</em><a name="641" href="#641">641</a> <em>     * </em><a name="642" href="#642">642</a> <em>     * @throws IllegalStateException if the CrawlURI</em><a name="643" href="#643">643</a> <em>     *         does not match a CrawlURI issued for crawling by this HQ's</em><a name="644" href="#644">644</a> <em>     *         {@link AdaptiveRevisitHostQueue#next() next()}.</em><a name="645" href="#645">645</a> <em>     * @throws IOException if an error occurs accessing the database</em><a name="646" href="#646">646</a> <em>     */</em><a name="647" href="#647">647</a>     <strong>public</strong> <strong>void</strong> update(<a href="../../../../org/archive/crawler/datamodel/CrawlURI.html">CrawlURI</a> curi, <a name="648" href="#648">648</a>                        <strong>boolean</strong> needWait, <a name="649" href="#649">649</a>                        <strong>long</strong> wakeupTime, <a name="650" href="#650">650</a>                        <strong>boolean</strong> forgetURI) <a name="651" href="#651">651</a>             throws IllegalStateException, IOException{<a name="652" href="#652">652</a>         <strong>if</strong> (logger.isLoggable(Level.FINE)) {<a name="653" href="#653">653</a>             logger.fine(<span class="string">"Updating "</span> + curi.toString());<a name="654" href="#654">654</a>         }<a name="655" href="#655">655</a>         <strong>try</strong>{<a name="656" href="#656">656</a>             <em class="comment">// First add it to the regular queue (if not forgetting it).</em><a name="657" href="#657">657</a>             <strong>if</strong> (forgetURI == false){<a name="658" href="#658">658</a>                 OperationStatus opStatus = strictAdd(curi,false);<a name="659" href="#659">659</a>                 <strong>if</strong>(opStatus != OperationStatus.SUCCESS){<a name="660" href="#660">660</a>                     <strong>if</strong>(opStatus == OperationStatus.KEYEXIST){<a name="661" href="#661">661</a>                         <strong>throw</strong> <strong>new</strong> IllegalStateException(<span class="string">"Trying to update a"</span> +<a name="662" href="#662">662</a>                             <span class="string">" CrawlURI failed because it was in the queue"</span> +<a name="663" href="#663">663</a>                             <span class="string">" of URIs waiting for processing. URIs currently"</span> +<a name="664" href="#664">664</a>                             <span class="string">" being processsed can never be in that queue."</span> +<a name="665" href="#665">665</a>                             <span class="string">" HQ: "</span> + hostName + <span class="string">", CrawlURI: "</span> + <a name="666" href="#666">666</a>                             curi.toString());<a name="667" href="#667">667</a>                     }<a name="668" href="#668">668</a>                 }<a name="669" href="#669">669</a> <a name="670" href="#670">670</a>                 <em class="comment">// Check if we need to update nextReadyTime</em><a name="671" href="#671">671</a>                 <strong>long</strong> curiTimeOfNextProcessing = curi.getLong(<a name="672" href="#672">672</a>                         A_TIME_OF_NEXT_PROCESSING);<a name="673" href="#673">673</a>                 <strong>if</strong>(nextReadyTime > curiTimeOfNextProcessing){<a name="674" href="#674">674</a>                     setNextReadyTime(curiTimeOfNextProcessing);<a name="675" href="#675">675</a>                 }<a name="676" href="#676">676</a>                 <a name="677" href="#677">677</a>             } <strong>else</strong> {<a name="678" href="#678">678</a>                 size--;<a name="679" href="#679">679</a>             }<a name="680" href="#680">680</a>             <a name="681" href="#681">681</a>             <em class="comment">// Then remove from list of in processing URIs</em><a name="682" href="#682">682</a>             deleteInProcessing(curi.toString());<a name="683" href="#683">683</a>             <a name="684" href="#684">684</a>             inProcessing--;<a name="685" href="#685">685</a>             <a name="686" href="#686">686</a>             <em class="comment">// Update the wakeUpTime slot.</em><a name="687" href="#687">687</a>             <strong>if</strong>(needWait==false){<a name="688" href="#688">688</a>                 <em class="comment">// Ok, no wait then. Set wake up time to 0.</em><a name="689" href="#689">689</a>                 wakeupTime = 0;<a name="690" href="#690">690</a>             }<a name="691" href="#691">691</a> <a name="692" href="#692">692</a>             updateWakeUpTimeSlot(wakeupTime);<a name="693" href="#693">693</a>         } <strong>catch</strong> (DatabaseException e) {<a name="694" href="#694">694</a>             <em class="comment">// Blanket catch all DBExceptions and convert to IOExceptions.</em><a name="695" href="#695">695</a>             IOException e2 = <strong>new</strong> IOException(e.getMessage());<a name="696" href="#696">696</a>             e2.setStackTrace(e.getStackTrace()); <em class="comment">//preserve original stacktrace</em><a name="697" href="#697">697</a>             <strong>throw</strong> e2; <a name="698" href="#698">698</a>         }<a name="699" href="#699">699</a>     }<a name="700" href="#700">700</a> <a name="701" href="#701">701</a>     <em>/**<em>*</em></em><a name="702" href="#702">702</a> <em>     * Returns the 'top' URI in the AdaptiveRevisitHostQueue. </em><a name="703" href="#703">703</a> <em>     * &lt;p></em><a name="704" href="#704">704</a> <em>     * HQ state will be set to {@link AdaptiveRevisitHostQueue#HQSTATE_BUSY busy} if this </em><a name="705" href="#705">705</a> <em>     * method returns normally.</em><a name="706" href="#706">706</a> <em> </em><a name="707" href="#707">707</a> <em>     * </em><a name="708" href="#708">708</a> <em>     * @return a CrawlURI ready for processing</em><a name="709" href="#709">709</a> <em>     * </em><a name=

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -