📄 arcwriter.html
字号:
<a name="160" href="#160">160</a> <em> * Constructor.</em><a name="161" href="#161">161</a> <em> *</em><a name="162" href="#162">162</a> <em> * @param serialNo used to generate unique file name sequences</em><a name="163" href="#163">163</a> <em> * @param dirs Where to drop the ARC files.</em><a name="164" href="#164">164</a> <em> * @param prefix ARC file prefix to use. If null, we use</em><a name="165" href="#165">165</a> <em> * DEFAULT_ARC_FILE_PREFIX.</em><a name="166" href="#166">166</a> <em> * @param cmprs Compress the ARC files written. The compression is done</em><a name="167" href="#167">167</a> <em> * by individually gzipping each record added to the ARC file: i.e. the</em><a name="168" href="#168">168</a> <em> * ARC file is a bunch of gzipped records concatenated together.</em><a name="169" href="#169">169</a> <em> * @param maxSize Maximum size for ARC files written.</em><a name="170" href="#170">170</a> <em> */</em><a name="171" href="#171">171</a> <strong>public</strong> <a href="../../../../org/archive/io/arc/ARCWriter.html">ARCWriter</a>(<strong>final</strong> AtomicInteger serialNo, <strong>final</strong> List<File> dirs,<a name="172" href="#172">172</a> <strong>final</strong> String prefix, <strong>final</strong> <strong>boolean</strong> cmprs, <strong>final</strong> <strong>int</strong> maxSize) {<a name="173" href="#173">173</a> <strong>this</strong>(serialNo, dirs, prefix, <span class="string">""</span>, cmprs, maxSize, <strong>null</strong>);<a name="174" href="#174">174</a> }<a name="175" href="#175">175</a> <a name="176" href="#176">176</a> <em>/**<em>*</em></em><a name="177" href="#177">177</a> <em> * Constructor.</em><a name="178" href="#178">178</a> <em> *</em><a name="179" href="#179">179</a> <em> * @param serialNo used to generate unique file name sequences</em><a name="180" href="#180">180</a> <em> * @param dirs Where to drop files.</em><a name="181" href="#181">181</a> <em> * @param prefix File prefix to use.</em><a name="182" href="#182">182</a> <em> * @param cmprs Compress the records written. </em><a name="183" href="#183">183</a> <em> * @param maxSize Maximum size for ARC files written.</em><a name="184" href="#184">184</a> <em> * @param suffix File tail to use. If null, unused.</em><a name="185" href="#185">185</a> <em> * @param meta File meta data. Can be null. Is list of File and/or</em><a name="186" href="#186">186</a> <em> * String objects.</em><a name="187" href="#187">187</a> <em> */</em><a name="188" href="#188">188</a> <strong>public</strong> <a href="../../../../org/archive/io/arc/ARCWriter.html">ARCWriter</a>(<strong>final</strong> AtomicInteger serialNo, <strong>final</strong> List<File> dirs,<a name="189" href="#189">189</a> <strong>final</strong> String prefix, <strong>final</strong> String suffix, <strong>final</strong> <strong>boolean</strong> cmprs,<a name="190" href="#190">190</a> <strong>final</strong> <strong>int</strong> maxSize, <strong>final</strong> List meta) {<a name="191" href="#191">191</a> <strong>super</strong>(serialNo, dirs, prefix, suffix, cmprs, maxSize,<a name="192" href="#192">192</a> ARC_FILE_EXTENSION);<a name="193" href="#193">193</a> <strong>this</strong>.metadata = meta;<a name="194" href="#194">194</a> }<a name="195" href="#195">195</a> <a name="196" href="#196">196</a> <strong>protected</strong> String createFile()<a name="197" href="#197">197</a> throws IOException {<a name="198" href="#198">198</a> String name = <strong>super</strong>.createFile();<a name="199" href="#199">199</a> writeFirstRecord(getCreateTimestamp());<a name="200" href="#200">200</a> <strong>return</strong> name;<a name="201" href="#201">201</a> }<a name="202" href="#202">202</a> <a name="203" href="#203">203</a> <strong>private</strong> <strong>void</strong> writeFirstRecord(<strong>final</strong> String ts)<a name="204" href="#204">204</a> throws IOException {<a name="205" href="#205">205</a> write(generateARCFileMetaData(ts));<a name="206" href="#206">206</a> }<a name="207" href="#207">207</a> <a name="208" href="#208">208</a> <em>/**<em>*</em></em><a name="209" href="#209">209</a> <em> * Write out the ARCMetaData.</em><a name="210" href="#210">210</a> <em> *</em><a name="211" href="#211">211</a> <em> * <p>Generate ARC file meta data. Currently we only do version 1 of the</em><a name="212" href="#212">212</a> <em> * ARC file formats or version 1.1 when metadata has been supplied (We</em><a name="213" href="#213">213</a> <em> * write it into the body of the first record in the arc file).</em><a name="214" href="#214">214</a> <em> *</em><a name="215" href="#215">215</a> <em> * <p>Version 1 metadata looks roughly like this:</em><a name="216" href="#216">216</a> <em> *</em><a name="217" href="#217">217</a> <em> * <pre>filedesc://testWriteRecord-JunitIAH20040110013326-2.arc 0.0.0.0 //</em><a name="218" href="#218">218</a> <em> * 20040110013326 text/plain 77</em><a name="219" href="#219">219</a> <em> * 1 0 InternetArchive</em><a name="220" href="#220">220</a> <em> * URL IP-address Archive-date Content-type Archive-length</em><a name="221" href="#221">221</a> <em> * </pre></em><a name="222" href="#222">222</a> <em> *</em><a name="223" href="#223">223</a> <em> * <p>If compress is set, then we generate a header that has been gzipped</em><a name="224" href="#224">224</a> <em> * in the Internet Archive manner. Such a gzipping enables the FEXTRA</em><a name="225" href="#225">225</a> <em> * flag in the FLG field of the gzip header. It then appends an extra</em><a name="226" href="#226">226</a> <em> * header field: '8', '0', 'L', 'X', '0', '0', '0', '0'. The first two</em><a name="227" href="#227">227</a> <em> * bytes are the length of the field and the last 6 bytes the Internet</em><a name="228" href="#228">228</a> <em> * Archive header. To learn about GZIP format, see RFC1952. To learn</em><a name="229" href="#229">229</a> <em> * about the Internet Archive extra header field, read the source for</em><a name="230" href="#230">230</a> <em> * av_ziparc which can be found at</em><a name="231" href="#231">231</a> <em> * <code>alexa/vista/alexa-tools-1.2/src/av_ziparc.cc</code>.</em><a name="232" href="#232">232</a> <em> *</em><a name="233" href="#233">233</a> <em> * <p>We do things in this roundabout manner because the java</em><a name="234" href="#234">234</a> <em> * GZIPOutputStream does not give access to GZIP header fields.</em><a name="235" href="#235">235</a> <em> *</em><a name="236" href="#236">236</a> <em> * @param date Date to put into the ARC metadata.</em><a name="237" href="#237">237</a> <em> *</em><a name="238" href="#238">238</a> <em> * @return Byte array filled w/ the arc header.</em><a name="239" href="#239">239</a> <em> * @throws IOException</em><a name="240" href="#240">240</a> <em> */</em><a name="241" href="#241">241</a> <strong>private</strong> byte [] generateARCFileMetaData(String date)<a name="242" href="#242">242</a> throws IOException {<a name="243" href="#243">243</a> <strong>int</strong> metadataBodyLength = getMetadataLength();<a name="244" href="#244">244</a> <em class="comment">// If metadata body, then the minor part of the version is '1' rather</em><a name="245" href="#245">245</a> <em class="comment">// than '0'.</em><a name="246" href="#246">246</a> String metadataHeaderLinesTwoAndThree =<a name="247" href="#247">247</a> getMetadataHeaderLinesTwoAndThree(<span class="string">"1 "</span> +<a name="248" href="#248">248</a> ((metadataBodyLength > 0)? <span class="string">"1"</span>: <span class="string">"0"</span>));<a name="249" href="#249">249</a> <strong>int</strong> recordLength = metadataBodyLength +<a name="250" href="#250">250</a> metadataHeaderLinesTwoAndThree.getBytes(DEFAULT_ENCODING).length;<a name="251" href="#251">251</a> String metadataHeaderStr = ARC_MAGIC_NUMBER + getBaseFilename() +<a name="252" href="#252">252</a> <span class="string">" 0.0.0.0 "</span> + date + <span class="string">" text/plain "</span> + recordLength +<a name="253" href="#253">253</a> metadataHeaderLinesTwoAndThree;<a name="254" href="#254">254</a> ByteArrayOutputStream metabaos =<a name="255" href="#255">255</a> <strong>new</strong> ByteArrayOutputStream(recordLength);<a name="256" href="#256">256</a> <em class="comment">// Write the metadata header.</em><a name="257" href="#257">257</a> metabaos.write(metadataHeaderStr.getBytes(DEFAULT_ENCODING));<a name="258" href="#258">258</a> <em class="comment">// Write the metadata body, if anything to write.</em><a name="259" href="#259">259</a> <strong>if</strong> (metadataBodyLength > 0) {<a name="260" href="#260">260</a> writeMetaData(metabaos);<a name="261" href="#261">261</a> }<a name="262" href="#262">262</a> <a name="263" href="#263">263</a> <em class="comment">// Write out a LINE_SEPARATORs to end this record.</em><a name="264" href="#264">264</a> metabaos.write(LINE_SEPARATOR);<a name="265" href="#265">265</a> <a name="266" href="#266">266</a> <em class="comment">// Now get bytes of all just written and compress if flag set.</em><a name="267" href="#267">267</a> byte [] bytes = metabaos.toByteArray();<a name="268" href="#268">268</a> <a name="269" href="#269">269</a> <strong>if</strong>(isCompressed()) {<a name="270" href="#270">270</a> <em class="comment">// GZIP the header but catch the gzipping into a byte array so we</em><a name="271" href="#271">271</a> <em class="comment">// can add the special IA GZIP header to the product. After</em><a name="272" href="#272">272</a> <em class="comment">// manipulations, write to the output stream (The JAVA GZIP</em><a name="273" href="#273">273</a> <em class="comment">// implementation does not give access to GZIP header. It</em><a name="274" href="#274">274</a> <em class="comment">// produces a 'default' header only). We can get away w/ these</em><a name="275" href="#275">275</a> <em class="comment">// maniupulations because the GZIP 'default' header doesn't</em><a name="276" href="#276">276</a> <em class="comment">// do the 'optional' CRC'ing of the header.</em><a name="277" href="#277">277</a> byte [] gzippedMetaData = GzippedInputStream.gzip(bytes);<a name="278" href="#278">278</a> <strong>if</strong> (gzippedMetaData[3] != 0) {<a name="279" href="#279">279</a> <strong>throw</strong> <strong>new</strong> IOException(<span class="string">"The GZIP FLG header is unexpectedly "</span> +<a name="280" href="#280">280</a> <span class="string">" non-zero. Need to add smarter code that can deal "</span> +<a name="281" href="#281">281</a> <span class="string">" when already extant extra GZIP header fields."</span>);<a name="282" href="#282">282</a> }<a name="283" href="#283">283</a> <em class="comment">// Set the GZIP FLG header to '4' which says that the GZIP header</em><a name="284" href="#284">284</a> <em class="comment">// has extra fields. Then insert the alex {'L', 'X', '0', '0', '0,</em><a name="285" href="#285">285</a> <em class="comment">// '0'} 'extra' field. The IA GZIP header will also set byte</em><a name="286" href="#286">286</a> <em class="comment">// 9 (zero-based), the OS byte, to 3 (Unix). We'll do the same.</em><a name="287" href="#287">287</a> gzippedMetaData[3] = 4;<a name="288" href="#288">288</a> gzippedMetaData[9] = 3;<a name="289" href="#289">289</a> byte [] assemblyBuffer = <strong>new</strong> byte[gzippedMetaData.length +<a name="290" href="#290">290</a> ARC_GZIP_EXTRA_FIELD.length];<a name="291" href="#291">291</a> <em class="comment">// '10' in the below is a pointer past the following bytes of the</em><a name="292" href="#292">292</a> <em class="comment">// GZIP header: ID1 ID2 CM FLG + MTIME(4-bytes) XFL OS. See</em><a name="293" href="#293">293</a> <em class="comment">// RFC1952 for explaination of the abbreviations just used.</em><a name="294" href="#294">294</a> System.arraycopy(gzippedMetaData, 0, assemblyBuffer, 0, 10);<a name="295" href="#295">295</a> System.arraycopy(ARC_GZIP_EXTRA_FIELD, 0, assemblyBuffer, 10,<a name="296" href="#296">296</a> ARC_GZIP_EXTRA_FIELD.length);<a name="297" href="#297">297</a> System.arraycopy(gzippedMetaData, 10, assemblyBuffer,<a name="298" href="#298">298</a> 10 + ARC_GZIP_EXTRA_FIELD.length, gzippedMetaData.length - 10);<a name="299" href="#299">299</a> bytes = assemblyBuffer;<a name="300" href="#300">300</a> }<a name="301" href="#301">301</a> <strong>return</strong> bytes;<a name="302" href="#302">302</a> }<a name="303" href="#303">303</a> <a name="304" href="#304">304</a> <strong>public</strong> String getMetadataHeaderLinesTwoAndThree(String version) {<a name="305" href="#305">305</a> StringBuffer buffer = <strong>new</strong> StringBuffer();<a name="306" href="#306">306</a> buffer.append(LINE_SEPARATOR);<a name="307" href="#307">307</a> buffer.append(version);<a name="308" href="#308">308</a> buffer.append(<span class="string">" InternetArchive"</span>);<a name="309" href="#309">309</a> buffer.append(LINE_SEPARATOR);<a name="310" href="#310">310</a> buffer.append(<span class="string">"URL IP-address Archive-date Content-type Archive-length"</span>);<a name="311" href="#311">311</a> buffer.append(LINE_SEPARATOR);<a name="312" href="#312">312</a> <strong>return</strong> buffer.toString();<a name="313" href="#313">313</a> }<a name="314" href="#314">314</a> <a name="315" href="#315">315</a> <em>/**<em>*</em></em><a name="316" href="#316">316</a> <em> * Write all metadata to passed <code>baos</code>.</em><a name="317" href="#317">317</a> <em> *</em><a name="318" href="#318">318</a> <em> * @param baos Byte array to write to.</em><a name="319" href="#319">319</a> <em> * @throws UnsupportedEncodingException</em><a name="320" href="#320">320</a> <em> * @throws IOException</em><a name="321" href="#321">321</a> <em> */</em><a name="322" href="#322">322</a> <strong>private</strong> <strong>void</strong> writeMetaData(ByteArrayOutputStream baos)<a name="323" href="#323">323</a> throws UnsupportedEncodingException, IOException {<a name="324" href="#324">324</a> <strong>if</strong> (<strong>this</strong>.metadata == <strong>null</strong>) {<a name="325" href="#325">325</a> <strong>return</strong>;<a name="326" href="#326">326</a> }<a name="327" href="#327">327</a>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -