⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 warcwriter.java

📁 高性能分词算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
    		final URI recordId, ANVLRecord xtraHeaders,            final InputStream contentStream, final long contentLength, boolean enforceLength)    throws IOException {    	if (!TYPES_LIST.contains(type)) {    		throw new IllegalArgumentException("Unknown record type: " + type);    	}    	if (contentLength == 0 &&                (xtraHeaders == null || xtraHeaders.size() <= 0)) {    		throw new IllegalArgumentException("Cannot write record " +    		    "of content-length zero and base headers only.");    	}    	        preWriteRecordTasks();        try {            final String header = createRecordHeader(type, url,            	create14DigitDate, mimetype, recordId, xtraHeaders,            	contentLength);            // TODO: Revisit endcoding of header.            write(header.getBytes(WARC_HEADER_ENCODING));                        if (contentStream != null && contentLength > 0) {                // Write out the header/body separator.                write(CRLF_BYTES); // TODO: should this be written even for zero-length?            	copyFrom(contentStream, contentLength, enforceLength);            }                        // Write out the two blank lines at end of all records, per spec            write(CRLF_BYTES);            write(CRLF_BYTES);        } finally {            postWriteRecordTasks();        }    }        protected URI generateRecordId(final Map<String, String> qualifiers)    throws IOException {    	URI rid = null;    	try {    		rid = GeneratorFactory.getFactory().    			getQualifiedRecordID(qualifiers);    	} catch (URISyntaxException e) {    		// Convert to IOE so can let it out.    		throw new IOException(e.getMessage());    	}    	return rid;    }        protected URI generateRecordId(final String key, final String value)    throws IOException {    	URI rid = null;    	try {    		rid = GeneratorFactory.getFactory().    			getQualifiedRecordID(key, value);    	} catch (URISyntaxException e) {    		// Convert to IOE so can let it out.    		throw new IOException(e.getMessage());    	}    	return rid;    }        public URI writeWarcinfoRecord(String filename)	throws IOException {    	return writeWarcinfoRecord(filename, null);    }        public URI writeWarcinfoRecord(String filename, final String description)        	throws IOException {        // Strip .open suffix if present.        if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) {        	filename = filename.substring(0,        		filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length());        }                ANVLRecord headerrecord = new ANVLRecord(1);        headerrecord.addLabelValue(HEADER_KEY_FILENAME, filename);                // Ugh, hate doing this but barring larger refactoring per-WARC        // 'metadata' is coming back as List<String> (?!?)        String blockfields = "";        if (this.fileMetadata == null) {            // only encountered in unit tests?            blockfields = "dummy: value";        } else {            for (String s :  (List<String>) fileMetadata) {                blockfields += s;            }        }        byte[] warcinfoBody;        if (description != null && description.length() > 0) {            // reconstitute and add new description            ANVLRecord blockrecord = ANVLRecord.load(blockfields);            blockrecord.addLabelValue(CONTENT_DESCRIPTION, description);            warcinfoBody = blockrecord.toString().getBytes("UTF-8");        } else {            // just use in already rendered form            warcinfoBody = blockfields.getBytes("UTF-8");        }        URI uri = writeWarcinfoRecord("application/warc-fields", headerrecord,            new ByteArrayInputStream(warcinfoBody), warcinfoBody.length);        return uri;    }        /**     * Write a warcinfo to current file.     * TODO: Write crawl metadata or pointers to crawl description.     * @param mimetype Mimetype of the <code>fileMetadata</code> block.     * @param namedFields Named fields. Pass <code>null</code> if none.     * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc.     * @param fileMetadataLength Length of <code>fileMetadata</code>.     * @throws IOException     * @return Generated record-id made with     * <a href="http://en.wikipedia.org/wiki/Data:_URL">data: scheme</a> and     * the current filename.     */    public URI writeWarcinfoRecord(final String mimetype,    	final ANVLRecord namedFields, final InputStream fileMetadata,    	final long fileMetadataLength)    throws IOException {    	final URI recordid = generateRecordId(TYPE, WARCINFO);    	writeWarcinfoRecord(ArchiveUtils.getLog14Date(), mimetype, recordid,            namedFields, fileMetadata, fileMetadataLength);    	return recordid;    }        /**     * Write a <code>warcinfo</code> to current file.     * The <code>warcinfo</code> type uses its <code>recordId</code> as its URL.     * @param recordId URI to use for this warcinfo.     * @param create14DigitDate Record creation date as 14 digit date.     * @param mimetype Mimetype of the <code>fileMetadata</code>.     * @param namedFields Named fields.     * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc.     * @param fileMetadataLength Length of <code>fileMetadata</code>.     * @throws IOException     */    public void writeWarcinfoRecord(final String create14DigitDate,        final String mimetype, final URI recordId, final ANVLRecord namedFields,    	final InputStream fileMetadata, final long fileMetadataLength)    throws IOException {    	writeRecord(WARCINFO, null, create14DigitDate, mimetype,        	recordId, namedFields, fileMetadata, fileMetadataLength, true);    }        public void writeRequestRecord(final String url,        final String create14DigitDate, final String mimetype,        final URI recordId,        final ANVLRecord namedFields, final InputStream request,        final long requestLength)    throws IOException {        writeRecord(REQUEST, url, create14DigitDate,            mimetype, recordId, namedFields, request,            requestLength, true);    }        public void writeResourceRecord(final String url,            final String create14DigitDate, final String mimetype,            final ANVLRecord namedFields, final InputStream response,            final long responseLength)    throws IOException {    	writeResourceRecord(url, create14DigitDate, mimetype, getRecordID(),    			namedFields, response, responseLength);    }        public void writeResourceRecord(final String url,            final String create14DigitDate, final String mimetype,            final URI recordId,            final ANVLRecord namedFields, final InputStream response,            final long responseLength)    throws IOException {        writeRecord(RESOURCE, url, create14DigitDate,            mimetype, recordId, namedFields, response,            responseLength, true);    }    public void writeResponseRecord(final String url,            final String create14DigitDate, final String mimetype,            final URI recordId,            final ANVLRecord namedFields, final InputStream response,            final long responseLength)    throws IOException {        writeRecord(RESPONSE, url, create14DigitDate,            mimetype, recordId, namedFields, response,            responseLength, true);    }        public void writeRevisitRecord(final String url,            final String create14DigitDate, final String mimetype,            final URI recordId,            final ANVLRecord namedFields, final InputStream response,            final long responseLength)    throws IOException {        writeRecord(REVISIT, url, create14DigitDate,            mimetype, recordId, namedFields, response,            responseLength, false);    }        public void writeMetadataRecord(final String url,            final String create14DigitDate, final String mimetype,            final URI recordId,            final ANVLRecord namedFields, final InputStream metadata,            final long metadataLength)    throws IOException {        writeRecord(METADATA, url, create14DigitDate,            mimetype, recordId, namedFields, metadata,            metadataLength, true);    }        /**     * Convenience method for getting Record-Ids.     * @return A record ID.     * @throws IOException     */    public static URI getRecordID() throws IOException {        URI result;        try {            result = GeneratorFactory.getFactory().getRecordID();        } catch (URISyntaxException e) {            throw new IOException(e.toString());        }        return result;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -