writerpoolprocessor.html
来自「网络爬虫开源代码」· HTML 代码 · 共 732 行 · 第 1/5 页
HTML
732 行
<a name="580" href="#580">580</a> e.printStackTrace();<a name="581" href="#581">581</a> } <strong>finally</strong> {<a name="582" href="#582">582</a> <strong>try</strong> {<a name="583" href="#583">583</a> <strong>if</strong> (dis != <strong>null</strong>) {<a name="584" href="#584">584</a> dis.close();<a name="585" href="#585">585</a> }<a name="586" href="#586">586</a> } <strong>catch</strong> (IOException e) {<a name="587" href="#587">587</a> e.printStackTrace();<a name="588" href="#588">588</a> }<a name="589" href="#589">589</a> }<a name="590" href="#590">590</a> }<a name="591" href="#591">591</a> <strong>return</strong> result;<a name="592" href="#592">592</a> }<a name="593" href="#593">593</a> <a name="594" href="#594">594</a> <strong>protected</strong> <strong>void</strong> saveCheckpointSerialNumber(<strong>final</strong> File checkpointDir,<a name="595" href="#595">595</a> <strong>final</strong> <strong>int</strong> serialNo)<a name="596" href="#596">596</a> throws IOException {<a name="597" href="#597">597</a> <em class="comment">// Write out the current state of the ARCWriter serial number.</em><a name="598" href="#598">598</a> File f = <strong>new</strong> File(checkpointDir, getCheckpointStateFile());<a name="599" href="#599">599</a> DataOutputStream dos = <strong>new</strong> DataOutputStream(<strong>new</strong> FileOutputStream(f));<a name="600" href="#600">600</a> <strong>try</strong> {<a name="601" href="#601">601</a> dos.writeShort(serialNo);<a name="602" href="#602">602</a> } <strong>finally</strong> {<a name="603" href="#603">603</a> dos.close();<a name="604" href="#604">604</a> }<a name="605" href="#605">605</a> }<a name="606" href="#606">606</a> <a name="607" href="#607">607</a> <em>/**<em>*</em></em><a name="608" href="#608">608</a> <em> * Return list of metadatas to add to first arc file metadata record.</em><a name="609" href="#609">609</a> <em> * </em><a name="610" href="#610">610</a> <em> * Default is to stylesheet the order file. To specify stylesheet,</em><a name="611" href="#611">611</a> <em> * override {@link #getFirstrecordStylesheet()}.</em><a name="612" href="#612">612</a> <em> *</em><a name="613" href="#613">613</a> <em> * Get xml files from settingshandler. Currently order file is the</em><a name="614" href="#614">614</a> <em> * only xml file. We're NOT adding seeds to meta data.</em><a name="615" href="#615">615</a> <em> *</em><a name="616" href="#616">616</a> <em> * @return List of strings and/or files to add to arc file as metadata or</em><a name="617" href="#617">617</a> <em> * null.</em><a name="618" href="#618">618</a> <em> */</em><a name="619" href="#619">619</a> <strong>public</strong> <strong>synchronized</strong> List<String> getMetadata() {<a name="620" href="#620">620</a> <strong>if</strong> (<strong>this</strong>.cachedMetadata != <strong>null</strong>) {<a name="621" href="#621">621</a> <strong>return</strong> <strong>this</strong>.cachedMetadata;<a name="622" href="#622">622</a> }<a name="623" href="#623">623</a> <strong>return</strong> cacheMetadata();<a name="624" href="#624">624</a> }<a name="625" href="#625">625</a> <a name="626" href="#626">626</a> <strong>protected</strong> <strong>synchronized</strong> List<String> cacheMetadata() {<a name="627" href="#627">627</a> <strong>if</strong> (<strong>this</strong>.cachedMetadata != <strong>null</strong>) {<a name="628" href="#628">628</a> <strong>return</strong> <strong>this</strong>.cachedMetadata;<a name="629" href="#629">629</a> }<a name="630" href="#630">630</a> <a name="631" href="#631">631</a> <em class="comment">// If no stylesheet, return empty metadata.</em><a name="632" href="#632">632</a> <strong>if</strong> (getFirstrecordStylesheet() == <strong>null</strong> ||<a name="633" href="#633">633</a> getFirstrecordStylesheet().length() == 0) {<a name="634" href="#634">634</a> <strong>this</strong>.cachedMetadata = <strong>new</strong> ArrayList<String>(1);<a name="635" href="#635">635</a> <strong>this</strong>.cachedMetadata.add(<span class="string">""</span>);<a name="636" href="#636">636</a> <strong>return</strong> <strong>this</strong>.cachedMetadata;<a name="637" href="#637">637</a> }<a name="638" href="#638">638</a> <a name="639" href="#639">639</a> List<String> result = <strong>null</strong>;<a name="640" href="#640">640</a> <strong>if</strong> (!XMLSettingsHandler.<strong>class</strong>.isInstance(getSettingsHandler())) {<a name="641" href="#641">641</a> logger.warning(<span class="string">"Expected xml settings handler (No warcinfo)."</span>);<a name="642" href="#642">642</a> <em class="comment">// Early return</em><a name="643" href="#643">643</a> <strong>return</strong> result;<a name="644" href="#644">644</a> }<a name="645" href="#645">645</a> <a name="646" href="#646">646</a> <a href="../../../../org/archive/crawler/settings/XMLSettingsHandler.html">XMLSettingsHandler</a> xsh = (XMLSettingsHandler)getSettingsHandler();<a name="647" href="#647">647</a> File orderFile = xsh.getOrderFile();<a name="648" href="#648">648</a> <strong>if</strong> (!orderFile.exists() || !orderFile.canRead()) {<a name="649" href="#649">649</a> logger.severe(<span class="string">"File "</span> + orderFile.getAbsolutePath() +<a name="650" href="#650">650</a> <span class="string">" is does not exist or is not readable."</span>);<a name="651" href="#651">651</a> } <strong>else</strong> {<a name="652" href="#652">652</a> result = <strong>new</strong> ArrayList<String>(1);<a name="653" href="#653">653</a> result.add(getFirstrecordBody(orderFile));<a name="654" href="#654">654</a> }<a name="655" href="#655">655</a> <strong>this</strong>.cachedMetadata = result;<a name="656" href="#656">656</a> <strong>return</strong> <strong>this</strong>.cachedMetadata;<a name="657" href="#657">657</a> }<a name="658" href="#658">658</a> <a name="659" href="#659">659</a> <em>/**<em>*</em></em><a name="660" href="#660">660</a> <em> * @preturn Full path to stylesheet (Its read off the CLASSPATH</em><a name="661" href="#661">661</a> <em> * as resource).</em><a name="662" href="#662">662</a> <em> */</em><a name="663" href="#663">663</a> <strong>protected</strong> String getFirstrecordStylesheet() {<a name="664" href="#664">664</a> <strong>return</strong> <strong>null</strong>;<a name="665" href="#665">665</a> }<a name="666" href="#666">666</a> <a name="667" href="#667">667</a> <em>/**<em>*</em></em><a name="668" href="#668">668</a> <em> * Write the arc metadata body content.</em><a name="669" href="#669">669</a> <em> *</em><a name="670" href="#670">670</a> <em> * Its based on the order xml file but into this base we'll add other info</em><a name="671" href="#671">671</a> <em> * such as machine ip.</em><a name="672" href="#672">672</a> <em> *</em><a name="673" href="#673">673</a> <em> * @param orderFile Order file.</em><a name="674" href="#674">674</a> <a name="675" href="#675">675</a> <em> *</em><a name="676" href="#676">676</a> <em> * @return String that holds the arc metaheader body.</em><a name="677" href="#677">677</a> <em> */</em><a name="678" href="#678">678</a> <strong>protected</strong> String getFirstrecordBody(File orderFile) {<a name="679" href="#679">679</a> String result = <strong>null</strong>;<a name="680" href="#680">680</a> TransformerFactory factory = TransformerFactory.newInstance();<a name="681" href="#681">681</a> Templates templates = <strong>null</strong>;<a name="682" href="#682">682</a> Transformer xformer = <strong>null</strong>;<a name="683" href="#683">683</a> <strong>try</strong> {<a name="684" href="#684">684</a> templates = factory.<strong>new</strong>Templates(<strong>new</strong> StreamSource(<a name="685" href="#685">685</a> <strong>this</strong>.getClass().getResourceAsStream(getFirstrecordStylesheet())));<a name="686" href="#686">686</a> xformer = templates.newTransformer();<a name="687" href="#687">687</a> <em class="comment">// Below parameter names must match what is in the stylesheet.</em><a name="688" href="#688">688</a> xformer.setParameter(<span class="string">"software"</span>, <span class="string">"Heritrix "</span> +<a name="689" href="#689">689</a> Heritrix.getVersion() + <span class="string">" http://crawler.archive.org"</span>);<a name="690" href="#690">690</a> xformer.setParameter(<span class="string">"ip"</span>,<a name="691" href="#691">691</a> InetAddress.getLocalHost().getHostAddress());<a name="692" href="#692">692</a> xformer.setParameter(<span class="string">"hostname"</span>,<a name="693" href="#693">693</a> InetAddress.getLocalHost().getHostName());<a name="694" href="#694">694</a> StreamSource source = <strong>new</strong> StreamSource(<a name="695" href="#695">695</a> <strong>new</strong> FileInputStream(orderFile));<a name="696" href="#696">696</a> StringWriter writer = <strong>new</strong> StringWriter();<a name="697" href="#697">697</a> StreamResult target = <strong>new</strong> StreamResult(writer);<a name="698" href="#698">698</a> xformer.transform(source, target);<a name="699" href="#699">699</a> result= writer.toString();<a name="700" href="#700">700</a> } <strong>catch</strong> (TransformerConfigurationException e) {<a name="701" href="#701">701</a> logger.severe(<span class="string">"Failed transform "</span> + e);<a name="702" href="#702">702</a> } <strong>catch</strong> (FileNotFoundException e) {<a name="703" href="#703">703</a> logger.severe(<span class="string">"Failed transform, file not found "</span> + e);<a name="704" href="#704">704</a> } <strong>catch</strong> (UnknownHostException e) {<a name="705" href="#705">705</a> logger.severe(<span class="string">"Failed transform, unknown host "</span> + e);<a name="706" href="#706">706</a> } <strong>catch</strong>(TransformerException e) {<a name="707" href="#707">707</a> SourceLocator locator = e.getLocator();<a name="708" href="#708">708</a> <strong>int</strong> col = locator.getColumnNumber();<a name="709" href="#709">709</a> <strong>int</strong> line = locator.getLineNumber();<a name="710" href="#710">710</a> String publicId = locator.getPublicId();<a name="711" href="#711">711</a> String systemId = locator.getSystemId();<a name="712" href="#712">712</a> logger.severe(<span class="string">"Transform error "</span> + e + <span class="string">", col "</span> + col + <span class="string">", line "</span> +<a name="713" href="#713">713</a> line + <span class="string">", publicId "</span> + publicId + <span class="string">", systemId "</span> + systemId);<a name="714" href="#714">714</a> }<a name="715" href="#715">715</a> <a name="716" href="#716">716</a> <strong>return</strong> result;<a name="717" href="#717">717</a> }<a name="718" href="#718">718</a> }</pre><hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body></html>
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?