arcreader.html
来自「网络爬虫开源代码」· HTML 代码 · 共 752 行 · 第 1/5 页
HTML
752 行
<a name="596" href="#596">596</a> <strong>public</strong> <strong>boolean</strong> isDigest() {<a name="597" href="#597">597</a> <strong>return</strong> <strong>this</strong>.delegate.isDigest();<a name="598" href="#598">598</a> }<a name="599" href="#599">599</a> <a name="600" href="#600">600</a> <strong>public</strong> <strong>boolean</strong> isStrict() {<a name="601" href="#601">601</a> <strong>return</strong> <strong>this</strong>.delegate.isStrict();<a name="602" href="#602">602</a> }<a name="603" href="#603">603</a> <a name="604" href="#604">604</a> <strong>public</strong> Iterator<ArchiveRecord> iterator() {<a name="605" href="#605">605</a> <strong>return</strong> <strong>this</strong>.delegate.iterator();<a name="606" href="#606">606</a> }<a name="607" href="#607">607</a> <a name="608" href="#608">608</a> <strong>public</strong> <strong>void</strong> setDigest(<strong>boolean</strong> d) {<a name="609" href="#609">609</a> <strong>this</strong>.delegate.setDigest(d);<a name="610" href="#610">610</a> }<a name="611" href="#611">611</a> <a name="612" href="#612">612</a> <strong>public</strong> <strong>void</strong> setStrict(<strong>boolean</strong> s) {<a name="613" href="#613">613</a> <strong>this</strong>.delegate.setStrict(s);<a name="614" href="#614">614</a> }<a name="615" href="#615">615</a> <a name="616" href="#616">616</a> <strong>public</strong> List validate() throws IOException {<a name="617" href="#617">617</a> <strong>return</strong> <strong>this</strong>.delegate.validate();<a name="618" href="#618">618</a> }<a name="619" href="#619">619</a> <a name="620" href="#620">620</a> @Override<a name="621" href="#621">621</a> <strong>public</strong> <a href="../../../../org/archive/io/ArchiveRecord.html">ArchiveRecord</a> get() throws IOException {<a name="622" href="#622">622</a> <strong>return</strong> <strong>this</strong>.delegate.get();<a name="623" href="#623">623</a> }<a name="624" href="#624">624</a> <a name="625" href="#625">625</a> @Override<a name="626" href="#626">626</a> <strong>public</strong> String getVersion() {<a name="627" href="#627">627</a> <strong>return</strong> <strong>this</strong>.delegate.getVersion();<a name="628" href="#628">628</a> }<a name="629" href="#629">629</a> <a name="630" href="#630">630</a> @Override<a name="631" href="#631">631</a> <strong>public</strong> List validate(<strong>int</strong> noRecords) throws IOException {<a name="632" href="#632">632</a> <strong>return</strong> <strong>this</strong>.delegate.validate(noRecords);<a name="633" href="#633">633</a> }<a name="634" href="#634">634</a> <a name="635" href="#635">635</a> @Override<a name="636" href="#636">636</a> <strong>protected</strong> <a href="../../../../org/archive/io/arc/ARCRecord.html">ARCRecord</a> createArchiveRecord(InputStream is,<a name="637" href="#637">637</a> <strong>long</strong> offset)<a name="638" href="#638">638</a> throws IOException {<a name="639" href="#639">639</a> <strong>return</strong> <strong>this</strong>.delegate.createArchiveRecord(is, offset);<a name="640" href="#640">640</a> }<a name="641" href="#641">641</a> <a name="642" href="#642">642</a> @Override<a name="643" href="#643">643</a> <strong>protected</strong> <strong>void</strong> gotoEOR(<a href="../../../../org/archive/io/ArchiveRecord.html">ArchiveRecord</a> record) throws IOException {<a name="644" href="#644">644</a> <strong>this</strong>.delegate.gotoEOR(record);<a name="645" href="#645">645</a> }<a name="646" href="#646">646</a> <a name="647" href="#647">647</a> @Override<a name="648" href="#648">648</a> <strong>public</strong> <strong>void</strong> dump(<strong>boolean</strong> compress)<a name="649" href="#649">649</a> throws IOException, java.text.ParseException {<a name="650" href="#650">650</a> <strong>this</strong>.delegate.dump(compress);<a name="651" href="#651">651</a> }<a name="652" href="#652">652</a> <a name="653" href="#653">653</a> @Override<a name="654" href="#654">654</a> <strong>public</strong> String getDotFileExtension() {<a name="655" href="#655">655</a> <strong>return</strong> <strong>this</strong>.delegate.getDotFileExtension();<a name="656" href="#656">656</a> }<a name="657" href="#657">657</a> <a name="658" href="#658">658</a> @Override<a name="659" href="#659">659</a> <strong>public</strong> String getFileExtension() {<a name="660" href="#660">660</a> <strong>return</strong> <strong>this</strong>.delegate.getFileExtension();<a name="661" href="#661">661</a> }<a name="662" href="#662">662</a> };<a name="663" href="#663">663</a> }<a name="664" href="#664">664</a> <a name="665" href="#665">665</a> <em class="comment">// Static methods follow.</em><a name="666" href="#666">666</a> <a name="667" href="#667">667</a> <em>/**<em>*</em></em><a name="668" href="#668">668</a> <em> *</em><a name="669" href="#669">669</a> <em> * @param formatter Help formatter instance.</em><a name="670" href="#670">670</a> <em> * @param options Usage options.</em><a name="671" href="#671">671</a> <em> * @param exitCode Exit code.</em><a name="672" href="#672">672</a> <em> */</em><a name="673" href="#673">673</a> <strong>private</strong> <strong>static</strong> <strong>void</strong> usage(HelpFormatter formatter, Options options,<a name="674" href="#674">674</a> <strong>int</strong> exitCode) {<a name="675" href="#675">675</a> formatter.printHelp(<span class="string">"java org.archive.io.arc.ARCReader"</span> +<a name="676" href="#676">676</a> <span class="string">" [--digest=true|false] //\n"</span> +<a name="677" href="#677">677</a> <span class="string">" [--format=cdx|cdxfile|dump|gzipdump|header|nohead]"</span> +<a name="678" href="#678">678</a> <span class="string">" [--offset=#] //\n[--strict] [--parse] ARC_FILE|ARC_URL"</span>,<a name="679" href="#679">679</a> options);<a name="680" href="#680">680</a> System.exit(exitCode);<a name="681" href="#681">681</a> }<a name="682" href="#682">682</a> <a name="683" href="#683">683</a> <em>/**<em>*</em></em><a name="684" href="#684">684</a> <em> * Write out the arcfile.</em><a name="685" href="#685">685</a> <em> * </em><a name="686" href="#686">686</a> <em> * @param reader</em><a name="687" href="#687">687</a> <em> * @param format Format to use outputting.</em><a name="688" href="#688">688</a> <em> * @throws IOException</em><a name="689" href="#689">689</a> <em> * @throws java.text.ParseException</em><a name="690" href="#690">690</a> <em> */</em><a name="691" href="#691">691</a> <strong>protected</strong> <strong>static</strong> <strong>void</strong> output(<a href="../../../../org/archive/io/arc/ARCReader.html">ARCReader</a> reader, String format)<a name="692" href="#692">692</a> throws IOException, java.text.ParseException {<a name="693" href="#693">693</a> <strong>if</strong> (!reader.output(format)) {<a name="694" href="#694">694</a> <strong>throw</strong> <strong>new</strong> IOException(<span class="string">"Unsupported format: "</span> + format);<a name="695" href="#695">695</a> }<a name="696" href="#696">696</a> }<a name="697" href="#697">697</a> <a name="698" href="#698">698</a> <em>/**<em>*</em></em><a name="699" href="#699">699</a> <em> * Generate a CDX index file for an ARC file.</em><a name="700" href="#700">700</a> <em> *</em><a name="701" href="#701">701</a> <em> * @param urlOrPath The ARC file to generate a CDX index for</em><a name="702" href="#702">702</a> <em> * @throws IOException</em><a name="703" href="#703">703</a> <em> * @throws java.text.ParseException</em><a name="704" href="#704">704</a> <em> */</em><a name="705" href="#705">705</a> <strong>public</strong> <strong>static</strong> <strong>void</strong> createCDXIndexFile(String urlOrPath)<a name="706" href="#706">706</a> throws IOException, java.text.ParseException {<a name="707" href="#707">707</a> ARCReader r = ARCReaderFactory.get(urlOrPath);<a name="708" href="#708">708</a> r.setStrict(false);<a name="709" href="#709">709</a> r.setParseHttpHeaders(<strong>true</strong>);<a name="710" href="#710">710</a> r.setDigest(<strong>true</strong>);<a name="711" href="#711">711</a> output(r, CDX_FILE);<a name="712" href="#712">712</a> }<a name="713" href="#713">713</a> <a name="714" href="#714">714</a> <em>/**<em>*</em></em><a name="715" href="#715">715</a> <em> * Command-line interface to ARCReader.</em><a name="716" href="#716">716</a> <em> *</em><a name="717" href="#717">717</a> <em> * Here is the command-line interface:</em><a name="718" href="#718">718</a> <em> * <pre></em><a name="719" href="#719">719</a> <em> * usage: java org.archive.io.arc.ARCReader [--offset=#] ARCFILE</em><a name="720" href="#720">720</a> <em> * -h,--help Prints this message and exits.</em><a name="721" href="#721">721</a> <em> * -o,--offset Outputs record at this offset into arc file.</pre></em><a name="722" href="#722">722</a> <em> *</em><a name="723" href="#723">723</a> <em> * <p>See in <code>$HERITRIX_HOME/bin/arcreader</code> for a script that'll</em><a name="724" href="#724">724</a> <em> * take care of classpaths and the calling of ARCReader.</em><a name="725" href="#725">725</a> <em> *</em><a name="726" href="#726">726</a> <em> * <p>Outputs using a pseudo-CDX format as described here:</em><a name="727" href="#727">727</a> <em> * <a href="<a href="http://www.archive.org/web/researcher/cdx_legend.php" target="alexandria_uri">http://www.archive.org/web/researcher/cdx_legend.php</a>">CDX</em><a name="728" href="#728">728</a> <em> * Legent</a> and here</em><a name="729" href="#729">729</a> <em> * <a href="<a href="http://www.archive.org/web/researcher/example_cdx.php" target="alexandria_uri">http://www.archive.org/web/researcher/example_cdx.php</a>">Example</a>.</em><a name="730" href="#730">730</a> <em> * Legend used in below is: 'CDX b e a m s c V (or v if uncompressed) n g'.</em><a name="731" href="#731">731</a> <em> * Hash is hard-coded straight SHA-1 hash of content.</em><a name="732" href="#732">732</a> <em> *</em><a name="733" href="#733">733</a> <em> * @param args Command-line arguments.</em><a name="734" href="#734">734</a> <em> * @throws ParseException Failed parse of the command line.</em><a name="735" href="#735">735</a> <em> * @throws IOException</em><a name="736" href="#736">736</a> <em> * @throws java.text.ParseException</em><a name="737" href="#737">737</a> <em> */</em><a name="738" href="#738">738</a> <strong>public</strong> <strong>static</strong> <strong>void</strong> main(String [] args)<a name="739" href="#739">739</a> throws ParseException, IOException, java.text.ParseException {<a name="740" href="#740">740</a> Options options = getOptions();<a name="741" href="#741">741</a> options.addOption(<strong>new</strong> Option(<span class="string">"p"</span>,<span class="string">"parse"</span>, false, <span class="string">"Parse headers."</span>));<a name="742" href="#742">742</a> PosixParser parser = <strong>new</strong> PosixParser();<a name="743" href="#743">743</a> CommandLine
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?