📄 warcwriter.java
字号:
final URI recordId, ANVLRecord xtraHeaders, final InputStream contentStream, final long contentLength, boolean enforceLength) throws IOException { if (!TYPES_LIST.contains(type)) { throw new IllegalArgumentException("Unknown record type: " + type); } if (contentLength == 0 && (xtraHeaders == null || xtraHeaders.size() <= 0)) { throw new IllegalArgumentException("Cannot write record " + "of content-length zero and base headers only."); } preWriteRecordTasks(); try { final String header = createRecordHeader(type, url, create14DigitDate, mimetype, recordId, xtraHeaders, contentLength); // TODO: Revisit endcoding of header. write(header.getBytes(WARC_HEADER_ENCODING)); if (contentStream != null && contentLength > 0) { // Write out the header/body separator. write(CRLF_BYTES); // TODO: should this be written even for zero-length? copyFrom(contentStream, contentLength, enforceLength); } // Write out the two blank lines at end of all records, per spec write(CRLF_BYTES); write(CRLF_BYTES); } finally { postWriteRecordTasks(); } } protected URI generateRecordId(final Map<String, String> qualifiers) throws IOException { URI rid = null; try { rid = GeneratorFactory.getFactory(). getQualifiedRecordID(qualifiers); } catch (URISyntaxException e) { // Convert to IOE so can let it out. throw new IOException(e.getMessage()); } return rid; } protected URI generateRecordId(final String key, final String value) throws IOException { URI rid = null; try { rid = GeneratorFactory.getFactory(). getQualifiedRecordID(key, value); } catch (URISyntaxException e) { // Convert to IOE so can let it out. throw new IOException(e.getMessage()); } return rid; } public URI writeWarcinfoRecord(String filename) throws IOException { return writeWarcinfoRecord(filename, null); } public URI writeWarcinfoRecord(String filename, final String description) throws IOException { // Strip .open suffix if present. if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) { filename = filename.substring(0, filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length()); } ANVLRecord headerrecord = new ANVLRecord(1); headerrecord.addLabelValue(HEADER_KEY_FILENAME, filename); // Ugh, hate doing this but barring larger refactoring per-WARC // 'metadata' is coming back as List<String> (?!?) String blockfields = ""; if (this.fileMetadata == null) { // only encountered in unit tests? blockfields = "dummy: value"; } else { for (String s : (List<String>) fileMetadata) { blockfields += s; } } byte[] warcinfoBody; if (description != null && description.length() > 0) { // reconstitute and add new description ANVLRecord blockrecord = ANVLRecord.load(blockfields); blockrecord.addLabelValue(CONTENT_DESCRIPTION, description); warcinfoBody = blockrecord.toString().getBytes("UTF-8"); } else { // just use in already rendered form warcinfoBody = blockfields.getBytes("UTF-8"); } URI uri = writeWarcinfoRecord("application/warc-fields", headerrecord, new ByteArrayInputStream(warcinfoBody), warcinfoBody.length); return uri; } /** * Write a warcinfo to current file. * TODO: Write crawl metadata or pointers to crawl description. * @param mimetype Mimetype of the <code>fileMetadata</code> block. * @param namedFields Named fields. Pass <code>null</code> if none. * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc. * @param fileMetadataLength Length of <code>fileMetadata</code>. * @throws IOException * @return Generated record-id made with * <a href="http://en.wikipedia.org/wiki/Data:_URL">data: scheme</a> and * the current filename. */ public URI writeWarcinfoRecord(final String mimetype, final ANVLRecord namedFields, final InputStream fileMetadata, final long fileMetadataLength) throws IOException { final URI recordid = generateRecordId(TYPE, WARCINFO); writeWarcinfoRecord(ArchiveUtils.getLog14Date(), mimetype, recordid, namedFields, fileMetadata, fileMetadataLength); return recordid; } /** * Write a <code>warcinfo</code> to current file. * The <code>warcinfo</code> type uses its <code>recordId</code> as its URL. * @param recordId URI to use for this warcinfo. * @param create14DigitDate Record creation date as 14 digit date. * @param mimetype Mimetype of the <code>fileMetadata</code>. * @param namedFields Named fields. * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc. * @param fileMetadataLength Length of <code>fileMetadata</code>. * @throws IOException */ public void writeWarcinfoRecord(final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream fileMetadata, final long fileMetadataLength) throws IOException { writeRecord(WARCINFO, null, create14DigitDate, mimetype, recordId, namedFields, fileMetadata, fileMetadataLength, true); } public void writeRequestRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream request, final long requestLength) throws IOException { writeRecord(REQUEST, url, create14DigitDate, mimetype, recordId, namedFields, request, requestLength, true); } public void writeResourceRecord(final String url, final String create14DigitDate, final String mimetype, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeResourceRecord(url, create14DigitDate, mimetype, getRecordID(), namedFields, response, responseLength); } public void writeResourceRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(RESOURCE, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength, true); } public void writeResponseRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(RESPONSE, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength, true); } public void writeRevisitRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(REVISIT, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength, false); } public void writeMetadataRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream metadata, final long metadataLength) throws IOException { writeRecord(METADATA, url, create14DigitDate, mimetype, recordId, namedFields, metadata, metadataLength, true); } /** * Convenience method for getting Record-Ids. * @return A record ID. * @throws IOException */ public static URI getRecordID() throws IOException { URI result; try { result = GeneratorFactory.getFactory().getRecordID(); } catch (URISyntaxException e) { throw new IOException(e.toString()); } return result; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -