archivereader.html

来自「网络爬虫开源代码」· HTML 代码 · 共 786 行 · 第 1/5 页

HTML
786
字号
<a name="624" href="#624">624</a>     <strong>protected</strong> <strong>static</strong> <strong>boolean</strong> getTrueOrFalse(<strong>final</strong> String value) {<a name="625" href="#625">625</a>     	<strong>if</strong> (value == <strong>null</strong> || value.length() &lt;= 0) {<a name="626" href="#626">626</a>     		<strong>return</strong> false;<a name="627" href="#627">627</a>     	}<a name="628" href="#628">628</a>         <strong>return</strong> Boolean.TRUE.toString().equals(value.toLowerCase());<a name="629" href="#629">629</a>     }<a name="630" href="#630">630</a>     <a name="631" href="#631">631</a>     <em>/**<em>*</em></em><a name="632" href="#632">632</a> <em>     * @param format Format to use outputting.</em><a name="633" href="#633">633</a> <em>     * @throws IOException</em><a name="634" href="#634">634</a> <em>     * @throws java.text.ParseException</em><a name="635" href="#635">635</a> <em>     * @return True if handled.</em><a name="636" href="#636">636</a> <em>     */</em><a name="637" href="#637">637</a>     <strong>protected</strong> <strong>boolean</strong> output(<strong>final</strong> String format)<a name="638" href="#638">638</a>     throws IOException, java.text.ParseException {<a name="639" href="#639">639</a>     	<strong>boolean</strong> result = <strong>true</strong>;<a name="640" href="#640">640</a>         <em class="comment">// long start = System.currentTimeMillis();</em><a name="641" href="#641">641</a>     	<a name="642" href="#642">642</a>         <em class="comment">// Write output as pseudo-CDX file.  See</em><a name="643" href="#643">643</a>         <em class="comment">// http://www.archive.org/web/researcher/cdx_legend.php</em><a name="644" href="#644">644</a>         <em class="comment">// and http://www.archive.org/web/researcher/example_cdx.php.</em><a name="645" href="#645">645</a>         <em class="comment">// Hash is hard-coded straight SHA-1 hash of content.</em><a name="646" href="#646">646</a>         <strong>if</strong> (format.equals(DUMP)) {<a name="647" href="#647">647</a>         	<em class="comment">// No point digesting dumping.</em><a name="648" href="#648">648</a>         	setDigest(false);<a name="649" href="#649">649</a>             dump(false);<a name="650" href="#650">650</a>         } <strong>else</strong> <strong>if</strong> (format.equals(GZIP_DUMP)) {<a name="651" href="#651">651</a>         	<em class="comment">// No point digesting dumping.</em><a name="652" href="#652">652</a>         	setDigest(false);<a name="653" href="#653">653</a>             dump(<strong>true</strong>);<a name="654" href="#654">654</a>         } <strong>else</strong> <strong>if</strong> (format.equals(CDX)) {<a name="655" href="#655">655</a>         	cdxOutput(false);   <a name="656" href="#656">656</a>         } <strong>else</strong> <strong>if</strong> (format.equals(CDX_FILE)) {<a name="657" href="#657">657</a>             cdxOutput(<strong>true</strong>);<a name="658" href="#658">658</a>         } <strong>else</strong> {<a name="659" href="#659">659</a>         	result = false;<a name="660" href="#660">660</a>         }	<a name="661" href="#661">661</a>         <strong>return</strong> result;<a name="662" href="#662">662</a>     }<a name="663" href="#663">663</a>     <a name="664" href="#664">664</a>     <strong>protected</strong> <strong>void</strong> cdxOutput(<strong>boolean</strong> toFile)<a name="665" href="#665">665</a>     throws IOException {<a name="666" href="#666">666</a>         BufferedWriter cdxWriter = <strong>null</strong>;<a name="667" href="#667">667</a>         <strong>if</strong> (toFile) {<a name="668" href="#668">668</a>             String cdxFilename = stripExtension(getReaderIdentifier(),<a name="669" href="#669">669</a>                 DOT_COMPRESSED_FILE_EXTENSION);<a name="670" href="#670">670</a>             cdxFilename = stripExtension(cdxFilename, getDotFileExtension());<a name="671" href="#671">671</a>             cdxFilename += ('.' + CDX);<a name="672" href="#672">672</a>             cdxWriter = <strong>new</strong> BufferedWriter(<strong>new</strong> FileWriter(cdxFilename));<a name="673" href="#673">673</a>         }<a name="674" href="#674">674</a>         <a name="675" href="#675">675</a>         String header = <span class="string">"CDX b e a m s c "</span> + ((isCompressed()) ? <span class="string">"V"</span> : <span class="string">"v"</span>)<a name="676" href="#676">676</a>             + <span class="string">" n g"</span>;<a name="677" href="#677">677</a>         <strong>if</strong> (toFile) {<a name="678" href="#678">678</a>             cdxWriter.write(header);<a name="679" href="#679">679</a>             cdxWriter.newLine();<a name="680" href="#680">680</a>         } <strong>else</strong> {<a name="681" href="#681">681</a>             System.out.println(header);<a name="682" href="#682">682</a>         }<a name="683" href="#683">683</a>         <a name="684" href="#684">684</a>         String strippedFileName = getStrippedFileName();<a name="685" href="#685">685</a>         <strong>try</strong> {<a name="686" href="#686">686</a>             <strong>for</strong> (Iterator&lt;ArchiveRecord> ii = iterator(); ii.hasNext();) {<a name="687" href="#687">687</a>             	ArchiveRecord r = ii.next();<a name="688" href="#688">688</a>                 <strong>if</strong> (toFile) {<a name="689" href="#689">689</a>                     cdxWriter.write(r.outputCdx(strippedFileName));<a name="690" href="#690">690</a>                     cdxWriter.newLine();<a name="691" href="#691">691</a>                 } <strong>else</strong> {<a name="692" href="#692">692</a>                     System.out.println(r.outputCdx(strippedFileName));<a name="693" href="#693">693</a>                 }<a name="694" href="#694">694</a>             }<a name="695" href="#695">695</a>         } <strong>finally</strong> {<a name="696" href="#696">696</a>             <strong>if</strong> (toFile) {<a name="697" href="#697">697</a>                 cdxWriter.close();<a name="698" href="#698">698</a>             }<a name="699" href="#699">699</a>         }<a name="700" href="#700">700</a>     }<a name="701" href="#701">701</a>     <a name="702" href="#702">702</a>     <em>/**<em>*</em></em><a name="703" href="#703">703</a> <em>     * Output passed record using passed format specifier.</em><a name="704" href="#704">704</a> <em>     * @param format What format to use outputting.</em><a name="705" href="#705">705</a> <em>     * @throws IOException</em><a name="706" href="#706">706</a> <em>     * @return True if handled.</em><a name="707" href="#707">707</a> <em>     */</em><a name="708" href="#708">708</a>     <strong>public</strong> <strong>boolean</strong> outputRecord(<strong>final</strong> String format)<a name="709" href="#709">709</a>     throws IOException {<a name="710" href="#710">710</a>     	<strong>boolean</strong> result = <strong>true</strong>;<a name="711" href="#711">711</a>         <strong>if</strong> (format.equals(CDX)) {<a name="712" href="#712">712</a>             System.out.println(get().outputCdx(getStrippedFileName()));<a name="713" href="#713">713</a>         } <strong>else</strong> <strong>if</strong>(format.equals(ArchiveFileConstants.DUMP)) {<a name="714" href="#714">714</a>             <em class="comment">// No point digesting if dumping content.</em><a name="715" href="#715">715</a>             setDigest(false);<a name="716" href="#716">716</a>             get().dump();<a name="717" href="#717">717</a>         } <strong>else</strong> {<a name="718" href="#718">718</a>         	result = false;<a name="719" href="#719">719</a>         }<a name="720" href="#720">720</a>         <strong>return</strong> result;<a name="721" href="#721">721</a>     }<a name="722" href="#722">722</a> <a name="723" href="#723">723</a>     <em>/**<em>*</em></em><a name="724" href="#724">724</a> <em>     * Dump this file on STDOUT</em><a name="725" href="#725">725</a> <em>     * @throws compress True if dumped output is compressed.</em><a name="726" href="#726">726</a> <em>     * @throws IOException</em><a name="727" href="#727">727</a> <em>     * @throws java.text.ParseException</em><a name="728" href="#728">728</a> <em>     */</em><a name="729" href="#729">729</a>     <strong>public</strong> <strong>abstract</strong> <strong>void</strong> dump(<strong>final</strong> <strong>boolean</strong> compress)<a name="730" href="#730">730</a>     throws IOException, java.text.ParseException;<a name="731" href="#731">731</a>     <a name="732" href="#732">732</a>     <em>/**<em>*</em></em><a name="733" href="#733">733</a> <em>     * @return an ArchiveReader that will delete a local file on close.  Used</em><a name="734" href="#734">734</a> <em>     * when we bring Archive files local and need to clean up afterward.</em><a name="735" href="#735">735</a> <em>     */</em><a name="736" href="#736">736</a>     <strong>public</strong> <strong>abstract</strong> <a href="../../../org/archive/io/ArchiveReader.html">ArchiveReader</a> getDeleteFileOnCloseReader(<strong>final</strong> File f);<a name="737" href="#737">737</a>     <a name="738" href="#738">738</a>     <em>/**<em>*</em></em><a name="739" href="#739">739</a> <em>     * Output passed record using passed format specifier.</em><a name="740" href="#740">740</a> <em>     * @param r ARCReader instance to output.</em><a name="741" href="#741">741</a> <em>     * @param format What format to use outputting.</em><a name="742" href="#742">742</a> <em>     * @throws IOException</em><a name="743" href="#743">743</a> <em>     */</em><a name="744" href="#744">744</a>     <strong>protected</strong> <strong>static</strong> <strong>void</strong> outputRecord(<strong>final</strong> <a href="../../../org/archive/io/ArchiveReader.html">ArchiveReader</a> r,<a name="745" href="#745">745</a>         <strong>final</strong> String format)<a name="746" href="#746">746</a>     throws IOException {<a name="747" href="#747">747</a>         <strong>if</strong> (!r.outputRecord(format)) {<a name="748" href="#748">748</a>             <strong>throw</strong> <strong>new</strong> IOException(<span class="string">"Unsupported format"</span> +<a name="749" href="#749">749</a>                 <span class="string">" (or unsupported on a single record): "</span> + format);<a name="750" href="#750">750</a>         }<a name="751" href="#751">751</a>     }<a name="752" href="#752">752</a>     <a name="753" href="#753">753</a>     <em>/**<em>*</em></em><a name="754" href="#754">754</a> <em>     * @return Base Options object filled out with help, digest, strict, etc.</em><a name="755" href="#755">755</a> <em>     * options.</em><a name="756" href="#756">756</a> <em>     */</em><a name="757" href="#757">757</a>     <strong>protected</strong> <strong>static</strong> Options getOptions() {<a name="758" href="#758">758</a>         Options options = <strong>new</strong> Options();<a name="759" href="#759">759</a>         options.addOption(<strong>new</strong> Option(<span class="string">"h"</span>,<span class="string">"help"</span>, false,<a name="760" href="#760">760</a>             <span class="string">"Prints this message and exits."</span>));<a name="761" href="#761">761</a>         options.addOption(<strong>new</strong> Option(<span class="string">"o"</span>,<span class="string">"offset"</span>, <strong>true</strong>,<a name="762" href="#762">762</a>             <span class="string">"Outputs record at this offset into file."</span>));<a name="763" href="#763">763</a>         options.addOption(<strong>new</strong> Option(<span class="string">"d"</span>,<span class="string">"digest"</span>, <strong>true</strong>,<a name="764" href="#764">764</a>             <span class="string">"Pass true|false. Expensive. Default: true (SHA-1)."</span>));<a name="765" href="#765">765</a>         options.addOption(<strong>new</strong> Option(<span class="string">"s"</span>,<span class="string">"strict"</span>, false,<a name="766" href="#766">766</a>             <span class="string">"Strict mode. Fails parse if incorrectly formatted file."</span>));<a name="767" href="#767">767</a>         options.addOption(<strong>new</strong> Option(<span class="string">"f"</span>,<span class="string">"format"</span>, <strong>true</strong>,<a name="768" href="#768">768</a>             <span class="string">"Output options: 'cdx', cdxfile', 'dump', 'gzipdump',"</span> +<a name="769" href="#769">769</a>             <span class="string">"'or 'nohead'. Default: 'cdx'."</span>));<a name="770" href="#770">770</a>         <strong>return</strong> options;<a name="771" href="#771">771</a>     }<a name="772" href="#772">772</a> }</pre><hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body></html>

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?