📄 experimentalwarcwriter.java
字号:
protected void writeRecord(final String type, final String url, final String create14DigitDate, final String mimetype, final URI recordId, ANVLRecord xtraHeaders, final InputStream contentStream, final long contentLength) throws IOException { if (!TYPES_LIST.contains(type)) { throw new IllegalArgumentException("Unknown record type: " + type); } if (contentLength == 0 && (xtraHeaders == null || xtraHeaders.size() <= 0)) { throw new IllegalArgumentException("Cannot write record " + "of content-length zero and base headers only."); } preWriteRecordTasks(); try { final String header = createRecordHeader(type, url, create14DigitDate, mimetype, recordId, xtraHeaders, contentLength); // TODO: Revisit endcoding of header. write(header.getBytes(WARC_HEADER_ENCODING)); if (contentStream != null && contentLength > 0) { // Write out the header/body separator. write(CRLF_BYTES); // TODO: should this be written even for zero-length? readToLimitFrom(contentStream, contentLength, this.readbuffer); } // Write out the two blank lines at end of all records. // TODO: Why? Messes up skipping through file. Also not in grammar. write(CRLF_BYTES); write(CRLF_BYTES); } finally { postWriteRecordTasks(); } } protected URI generateRecordId(final Map<String, String> qualifiers) throws IOException { URI rid = null; try { rid = GeneratorFactory.getFactory(). getQualifiedRecordID(qualifiers); } catch (URISyntaxException e) { // Convert to IOE so can let it out. throw new IOException(e.getMessage()); } return rid; } protected URI generateRecordId(final String key, final String value) throws IOException { URI rid = null; try { rid = GeneratorFactory.getFactory(). getQualifiedRecordID(key, value); } catch (URISyntaxException e) { // Convert to IOE so can let it out. throw new IOException(e.getMessage()); } return rid; } public URI writeWarcinfoRecord(String filename) throws IOException { return writeWarcinfoRecord(filename, null); } public URI writeWarcinfoRecord(String filename, final String description) throws IOException { // Strip .open suffix if present. if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) { filename = filename.substring(0, filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length()); } ANVLRecord record = new ANVLRecord(2); record.addLabelValue(HEADER_KEY_FILENAME, filename); if (description != null && description.length() > 0) { record.addLabelValue(CONTENT_DESCRIPTION, description); } // Add warcinfo body. byte [] warcinfoBody = null; if (this.fileMetadata == null) { // TODO: What to write into a warcinfo? What to associate? warcinfoBody = "TODO: Unimplemented".getBytes(); } else { ByteArrayOutputStream baos = new ByteArrayOutputStream(); for (final Iterator i = this.fileMetadata.iterator(); i.hasNext();) { baos.write(i.next().toString().getBytes(UTF8Bytes.UTF8)); } warcinfoBody = baos.toByteArray(); } URI uri = writeWarcinfoRecord("text/xml", record, new ByteArrayInputStream(warcinfoBody), warcinfoBody.length); // TODO: If at start of file, and we're writing compressed, // write out our distinctive GZIP extensions. return uri; } /** * Write a warcinfo to current file. * TODO: Write crawl metadata or pointers to crawl description. * @param mimetype Mimetype of the <code>fileMetadata</code> block. * @param namedFields Named fields. Pass <code>null</code> if none. * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc. * @param fileMetadataLength Length of <code>fileMetadata</code>. * @throws IOException * @return Generated record-id made with * <a href="http://en.wikipedia.org/wiki/Data:_URL">data: scheme</a> and * the current filename. */ public URI writeWarcinfoRecord(final String mimetype, final ANVLRecord namedFields, final InputStream fileMetadata, final long fileMetadataLength) throws IOException { final URI recordid = generateRecordId(TYPE, WARCINFO); writeWarcinfoRecord(ArchiveUtils.getLog14Date(), mimetype, recordid, namedFields, fileMetadata, fileMetadataLength); return recordid; } /** * Write a <code>warcinfo</code> to current file. * The <code>warcinfo</code> type uses its <code>recordId</code> as its URL. * @param recordId URI to use for this warcinfo. * @param create14DigitDate Record creation date as 14 digit date. * @param mimetype Mimetype of the <code>fileMetadata</code>. * @param namedFields Named fields. * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc. * @param fileMetadataLength Length of <code>fileMetadata</code>. * @throws IOException */ public void writeWarcinfoRecord(final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream fileMetadata, final long fileMetadataLength) throws IOException { writeRecord(WARCINFO, null, create14DigitDate, mimetype, recordId, namedFields, fileMetadata, fileMetadataLength); } public void writeRequestRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream request, final long requestLength) throws IOException { writeRecord(REQUEST, url, create14DigitDate, mimetype, recordId, namedFields, request, requestLength); } public void writeResourceRecord(final String url, final String create14DigitDate, final String mimetype, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeResourceRecord(url, create14DigitDate, mimetype, getRecordID(), namedFields, response, responseLength); } public void writeResourceRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(RESOURCE, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength); } public void writeResponseRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(RESPONSE, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength); } public void writeRevisitRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(REVISIT, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength); } public void writeMetadataRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream metadata, final long metadataLength) throws IOException { writeRecord(METADATA, url, create14DigitDate, mimetype, recordId, namedFields, metadata, metadataLength); } /** * Convenience method for getting Record-Ids. * @return A record ID. * @throws IOException */ public static URI getRecordID() throws IOException { URI result; try { result = GeneratorFactory.getFactory().getRecordID(); } catch (URISyntaxException e) { throw new IOException(e.toString()); } return result; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -