archivereader.html

来自「网络爬虫开源代码」· HTML 代码 · 共 786 行 · 第 1/5 页

HTML
786
字号
<a name="466" href="#466">466</a>             <em class="comment">// any content not yet read.</em><a name="467" href="#467">467</a>             <strong>try</strong> {<a name="468" href="#468">468</a>                 cleanupCurrentRecord();<a name="469" href="#469">469</a>             } <strong>catch</strong> (IOException e) {<a name="470" href="#470">470</a>                 <strong>if</strong> (isStrict()) {<a name="471" href="#471">471</a>                     <strong>throw</strong> <strong>new</strong> RuntimeException(e);<a name="472" href="#472">472</a>                 }<a name="473" href="#473">473</a>                 <strong>if</strong> (e instanceof EOFException) {<a name="474" href="#474">474</a>                     logger.warning(<span class="string">"Premature EOF cleaning up "</span> + <a name="475" href="#475">475</a>                         currentRecord.getHeader().toString() + <span class="string">": "</span> +<a name="476" href="#476">476</a>                         e.getMessage());<a name="477" href="#477">477</a>                     <strong>return</strong> false;<a name="478" href="#478">478</a>                 }<a name="479" href="#479">479</a>                 <em class="comment">// If not strict, try going again.  We might be able to skip</em><a name="480" href="#480">480</a>                 <em class="comment">// over the bad record.</em><a name="481" href="#481">481</a>                 logger.warning(<span class="string">"Trying skip of failed record cleanup of "</span> +<a name="482" href="#482">482</a>                     currentRecord.getHeader().toString() + <span class="string">": "</span> +<a name="483" href="#483">483</a>                     e.getMessage());<a name="484" href="#484">484</a>             }<a name="485" href="#485">485</a>             <strong>return</strong> innerHasNext();<a name="486" href="#486">486</a>         }<a name="487" href="#487">487</a>         <a name="488" href="#488">488</a>         <strong>protected</strong> <strong>boolean</strong> innerHasNext() {<a name="489" href="#489">489</a>             <strong>long</strong> offset = -1;<a name="490" href="#490">490</a>             <strong>try</strong> {<a name="491" href="#491">491</a>                 offset = ((RepositionableStream)getInputStream()).position();<a name="492" href="#492">492</a>                 <strong>return</strong> getInputStream().available() > 0;<a name="493" href="#493">493</a>             } <strong>catch</strong> (IOException e) {<a name="494" href="#494">494</a>                 <strong>throw</strong> <strong>new</strong> RuntimeException(<span class="string">"Offset "</span> + offset, e);<a name="495" href="#495">495</a>             }<a name="496" href="#496">496</a>         }<a name="497" href="#497">497</a> <a name="498" href="#498">498</a>         <em>/**<em>*</em></em><a name="499" href="#499">499</a> <em>         * Tries to move to next record if we get</em><a name="500" href="#500">500</a> <em>         * {@link RecoverableIOException}. If not &lt;code>strict&lt;/code></em><a name="501" href="#501">501</a> <em>         * tries to move to next record if we get an</em><a name="502" href="#502">502</a> <em>         * {@link IOException}.</em><a name="503" href="#503">503</a> <em>         * @return Next object.</em><a name="504" href="#504">504</a> <em>         * @exception RuntimeException Throws a runtime exception,</em><a name="505" href="#505">505</a> <em>         * usually a wrapping of an IOException, if trouble getting</em><a name="506" href="#506">506</a> <em>         * a record (Throws exception rather than return null).</em><a name="507" href="#507">507</a> <em>         */</em><a name="508" href="#508">508</a>         <strong>public</strong> <a href="../../../org/archive/io/ArchiveRecord.html">ArchiveRecord</a> next() {<a name="509" href="#509">509</a>             <strong>long</strong> offset = -1;<a name="510" href="#510">510</a>             <strong>try</strong> {<a name="511" href="#511">511</a>                 offset = ((RepositionableStream)getInputStream()).position();<a name="512" href="#512">512</a>                 <strong>return</strong> exceptionNext();<a name="513" href="#513">513</a>             } <strong>catch</strong> (IOException e) {<a name="514" href="#514">514</a>                 <strong>if</strong> (!isStrict()) {<a name="515" href="#515">515</a>                     <em class="comment">// Retry though an IOE.  Maybe we will succeed reading</em><a name="516" href="#516">516</a>                     <em class="comment">// subsequent record.</em><a name="517" href="#517">517</a>                     <strong>try</strong> {<a name="518" href="#518">518</a>                         <strong>if</strong> (hasNext()) {<a name="519" href="#519">519</a>                             getLogger().warning(<span class="string">"Bad Record. Trying skip "</span> +<a name="520" href="#520">520</a>                                 <span class="string">"(Current offset "</span> +  offset + <span class="string">"): "</span> +<a name="521" href="#521">521</a>                                 e.getMessage());<a name="522" href="#522">522</a>                             <strong>return</strong> exceptionNext();<a name="523" href="#523">523</a>                         }<a name="524" href="#524">524</a>                         <em class="comment">// Else we are at last record.  Iterator#next is</em><a name="525" href="#525">525</a>                         <em class="comment">// expecting value. We do not have one. Throw exception.</em><a name="526" href="#526">526</a>                         <strong>throw</strong> <strong>new</strong> RuntimeException(<span class="string">"Retried but no next "</span> + <a name="527" href="#527">527</a>                             <span class="string">"record (Offset "</span> + offset + <span class="string">")"</span>, e);<a name="528" href="#528">528</a>                     } <strong>catch</strong> (IOException e1) {<a name="529" href="#529">529</a>                         <strong>throw</strong> <strong>new</strong> RuntimeException(<span class="string">"After retry (Offset "</span> +<a name="530" href="#530">530</a>                                 offset + <span class="string">")"</span>, e1);<a name="531" href="#531">531</a>                     }<a name="532" href="#532">532</a>                 }<a name="533" href="#533">533</a>                 <strong>throw</strong> <strong>new</strong> RuntimeException(<span class="string">"(Offset "</span> + offset + <span class="string">")"</span>, e);<a name="534" href="#534">534</a>             }<a name="535" href="#535">535</a>         }<a name="536" href="#536">536</a>         <a name="537" href="#537">537</a>         <em>/**<em>*</em></em><a name="538" href="#538">538</a> <em>         * A next that throws exceptions and has handling of</em><a name="539" href="#539">539</a> <em>         * recoverable exceptions moving us to next record. Can call</em><a name="540" href="#540">540</a> <em>         * hasNext which itself may throw exceptions.</em><a name="541" href="#541">541</a> <em>         * @return Next record.</em><a name="542" href="#542">542</a> <em>         * @throws IOException</em><a name="543" href="#543">543</a> <em>         * @throws RuntimeException Thrown when we've reached maximum</em><a name="544" href="#544">544</a> <em>         * retries.</em><a name="545" href="#545">545</a> <em>         */</em><a name="546" href="#546">546</a>         <strong>protected</strong> <a href="../../../org/archive/io/ArchiveRecord.html">ArchiveRecord</a> exceptionNext()<a name="547" href="#547">547</a>         throws IOException, RuntimeException {<a name="548" href="#548">548</a>             <a href="../../../org/archive/io/ArchiveRecord.html">ArchiveRecord</a> result = <strong>null</strong>;<a name="549" href="#549">549</a>             IOException ioe = <strong>null</strong>;<a name="550" href="#550">550</a>             <strong>for</strong> (<strong>int</strong> i = MAX_ALLOWED_RECOVERABLES; i > 0 &amp;&amp;<a name="551" href="#551">551</a>                     result == <strong>null</strong>; i--) {<a name="552" href="#552">552</a>                 ioe = <strong>null</strong>;<a name="553" href="#553">553</a>                 <strong>try</strong> {<a name="554" href="#554">554</a>                     result = innerNext();<a name="555" href="#555">555</a>                 } <strong>catch</strong> (RecoverableIOException e) {<a name="556" href="#556">556</a>                     ioe = e;<a name="557" href="#557">557</a>                     getLogger().warning(e.getMessage());<a name="558" href="#558">558</a>                     <strong>if</strong> (hasNext()) {<a name="559" href="#559">559</a>                         <strong>continue</strong>;<a name="560" href="#560">560</a>                     }<a name="561" href="#561">561</a>                     <em class="comment">// No records left.  Throw exception rather than</em><a name="562" href="#562">562</a>                     <em class="comment">// return null.  The caller is expecting to get</em><a name="563" href="#563">563</a>                     <em class="comment">// back a record since they've just called</em><a name="564" href="#564">564</a>                     <em class="comment">// hasNext.</em><a name="565" href="#565">565</a>                     <strong>break</strong>;<a name="566" href="#566">566</a>                 }<a name="567" href="#567">567</a>             }<a name="568" href="#568">568</a>             <strong>if</strong> (ioe != <strong>null</strong>) {<a name="569" href="#569">569</a>                 <em class="comment">// Then we did MAX_ALLOWED_RECOVERABLES retries.  Throw</em><a name="570" href="#570">570</a>                 <em class="comment">// the recoverable ioe wrapped in a RuntimeException so</em><a name="571" href="#571">571</a>                 <em class="comment">// it goes out pass checks for IOE.</em><a name="572" href="#572">572</a>                 <strong>throw</strong> <strong>new</strong> RuntimeException(<span class="string">"Retried "</span> +<a name="573" href="#573">573</a>                     MAX_ALLOWED_RECOVERABLES + <span class="string">" times in a row"</span>, ioe);<a name="574" href="#574">574</a>             }<a name="575" href="#575">575</a>             <strong>return</strong> result;<a name="576" href="#576">576</a>         }<a name="577" href="#577">577</a>         <a name="578" href="#578">578</a>         <strong>protected</strong> <a href="../../../org/archive/io/ArchiveRecord.html">ArchiveRecord</a> innerNext() throws IOException {<a name="579" href="#579">579</a>             <strong>return</strong> get(((RepositionableStream)getInputStream()).position());<a name="580" href="#580">580</a>         }<a name="581" href="#581">581</a>         <a name="582" href="#582">582</a>         <strong>public</strong> <strong>void</strong> remove() {<a name="583" href="#583">583</a>             <strong>throw</strong> <strong>new</strong> UnsupportedOperationException();<a name="584" href="#584">584</a>         }<a name="585" href="#585">585</a>     }<a name="586" href="#586">586</a>     <a name="587" href="#587">587</a>     <strong>protected</strong> <strong>static</strong> String stripExtension(<strong>final</strong> String name,<a name="588" href="#588">588</a>     		<strong>final</strong> String ext) {<a name="589" href="#589">589</a>         <strong>return</strong> (!name.endsWith(ext))? name:<a name="590" href="#590">590</a>             name.substring(0, name.length() - ext.length());<a name="591" href="#591">591</a>     }<a name="592" href="#592">592</a>     <a name="593" href="#593">593</a>     <em>/**<em>*</em></em><a name="594" href="#594">594</a> <em>     * @return short name of Archive file.</em><a name="595" href="#595">595</a> <em>     */</em><a name="596" href="#596">596</a>     <strong>public</strong> String getFileName() {<a name="597" href="#597">597</a>         <strong>return</strong> (<strong>new</strong> File(getReaderIdentifier())).getName();<a name="598" href="#598">598</a>     }<a name="599" href="#599">599</a> <a name="600" href="#600">600</a>     <em>/**<em>*</em></em><a name="601" href="#601">601</a> <em>     * @return short name of Archive file.</em><a name="602" href="#602">602</a> <em>     */</em><a name="603" href="#603">603</a>     <strong>public</strong> String getStrippedFileName() {<a name="604" href="#604">604</a>         <strong>return</strong> getStrippedFileName(getFileName(),<a name="605" href="#605">605</a>     		getDotFileExtension());<a name="606" href="#606">606</a>     }<a name="607" href="#607">607</a>     <a name="608" href="#608">608</a>     <em>/**<em>*</em></em><a name="609" href="#609">609</a> <em>     * @param name Name of ARCFile.</em><a name="610" href="#610">610</a> <em>     * @param dotFileExtension '.arc' or '.warc', etc.</em><a name="611" href="#611">611</a> <em>     * @return short name of Archive file.</em><a name="612" href="#612">612</a> <em>     */</em><a name="613" href="#613">613</a>     <strong>public</strong> <strong>static</strong> String getStrippedFileName(String name,<a name="614" href="#614">614</a>     		<strong>final</strong> String dotFileExtension) {<a name="615" href="#615">615</a>     	name = stripExtension(name,<a name="616" href="#616">616</a>     		ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION);<a name="617" href="#617">617</a>     	<strong>return</strong> stripExtension(name, dotFileExtension);<a name="618" href="#618">618</a>     }<a name="619" href="#619">619</a>     <a name="620" href="#620">620</a>     <em>/**<em>*</em></em><a name="621" href="#621">621</a> <em>     * @param value Value to test.</em><a name="622" href="#622">622</a> <em>     * @return True if value is 'true', else false.</em><a name="623" href="#623">623</a> <em>     */</em>

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?