⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 arcwriter.java

📁 一个基于lucene&heritrix的搜索引擎
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
            getMetadataHeaderLinesTwoAndThree("1 " +                ((metadataBodyLength > 0)? "1": "0"));        int recordLength = metadataBodyLength +            metadataHeaderLinesTwoAndThree.getBytes(DEFAULT_ENCODING).length;        String metadataHeaderStr = ARC_MAGIC_NUMBER + getBaseFilename() +            " 0.0.0.0 " + date + " text/plain " + recordLength +            metadataHeaderLinesTwoAndThree;        ByteArrayOutputStream metabaos =            new ByteArrayOutputStream(recordLength);        // Write the metadata header.        metabaos.write(metadataHeaderStr.getBytes(DEFAULT_ENCODING));        // Write the metadata body, if anything to write.        if (metadataBodyLength > 0) {            writeMetaData(metabaos);        }                // Write out a LINE_SEPARATORs to end this record.        metabaos.write(LINE_SEPARATOR);                // Now get bytes of all just written and compress if flag set.        byte [] bytes = metabaos.toByteArray();                if(isCompressed()) {            // GZIP the header but catch the gzipping into a byte array so we            // can add the special IA GZIP header to the product.  After            // manipulations, write to the output stream (The JAVA GZIP            // implementation does not give access to GZIP header. It            // produces a 'default' header only).  We can get away w/ these            // maniupulations because the GZIP 'default' header doesn't            // do the 'optional' CRC'ing of the header.            byte [] gzippedMetaData = GzippedInputStream.gzip(bytes);            if (gzippedMetaData[3] != 0) {                throw new IOException("The GZIP FLG header is unexpectedly " +                    " non-zero.  Need to add smarter code that can deal " +                    " when already extant extra GZIP header fields.");            }            // Set the GZIP FLG header to '4' which says that the GZIP header            // has extra fields.  Then insert the alex {'L', 'X', '0', '0', '0,            // '0'} 'extra' field.  The IA GZIP header will also set byte            // 9 (zero-based), the OS byte, to 3 (Unix).  We'll do the same.            gzippedMetaData[3] = 4;            gzippedMetaData[9] = 3;            byte [] assemblyBuffer = new byte[gzippedMetaData.length +                ARC_GZIP_EXTRA_FIELD.length];            // '10' in the below is a pointer past the following bytes of the            // GZIP header: ID1 ID2 CM FLG + MTIME(4-bytes) XFL OS.  See            // RFC1952 for explaination of the abbreviations just used.            System.arraycopy(gzippedMetaData, 0, assemblyBuffer, 0, 10);            System.arraycopy(ARC_GZIP_EXTRA_FIELD, 0, assemblyBuffer, 10,                ARC_GZIP_EXTRA_FIELD.length);            System.arraycopy(gzippedMetaData, 10, assemblyBuffer,                10 + ARC_GZIP_EXTRA_FIELD.length, gzippedMetaData.length - 10);            bytes = assemblyBuffer;        }        return bytes;    }        public String getMetadataHeaderLinesTwoAndThree(String version) {        StringBuffer buffer = new StringBuffer();        buffer.append(LINE_SEPARATOR);        buffer.append(version);        buffer.append(" InternetArchive");        buffer.append(LINE_SEPARATOR);        buffer.append("URL IP-address Archive-date Content-type Archive-length");        buffer.append(LINE_SEPARATOR);        return buffer.toString();    }    /**     * Write all metadata to passed <code>baos</code>.     *     * @param baos Byte array to write to.     * @throws UnsupportedEncodingException     * @throws IOException     */    private void writeMetaData(ByteArrayOutputStream baos)            throws UnsupportedEncodingException, IOException {        if (this.metadata == null) {            return;        }        for (Iterator i = this.metadata.iterator();                i.hasNext();) {            Object obj = i.next();            if (obj instanceof String) {                baos.write(((String)obj).getBytes(DEFAULT_ENCODING));            } else if (obj instanceof File) {                InputStream is = null;                try {                    is = new BufferedInputStream(                        new FileInputStream((File)obj));                    byte [] buffer = new byte[4096];                    for (int read = -1; (read = is.read(buffer)) != -1;) {                        baos.write(buffer, 0, read);                    }                } finally {                    if (is != null) {                        is.close();                    }                }            } else if (obj != null) {                logger.severe("Unsupported metadata type: " + obj);            }        }        return;    }    /**     * @return Total length of metadata.     * @throws UnsupportedEncodingException     */    private int getMetadataLength()    throws UnsupportedEncodingException {        int result = -1;        if (this.metadata == null) {            result = 0;        } else {            for (Iterator i = this.metadata.iterator();                    i.hasNext();) {                Object obj = i.next();                if (obj instanceof String) {                    result += ((String)obj).getBytes(DEFAULT_ENCODING).length;                } else if (obj instanceof File) {                    result += ((File)obj).length();                } else {                    logger.severe("Unsupported metadata type: " + obj);                }            }        }        return result;    }    public void write(String uri, String contentType, String hostIP,            long fetchBeginTimeStamp, int recordLength,            ByteArrayOutputStream baos)    throws IOException {        preWriteRecordTasks();        try {            write(getMetaLine(uri, contentType, hostIP,                fetchBeginTimeStamp, recordLength).getBytes(UTF8));            baos.writeTo(getOutputStream());            write(LINE_SEPARATOR);        } finally {            postWriteRecordTasks();        }    }    public void write(String uri, String contentType, String hostIP,            long fetchBeginTimeStamp, int recordLength, InputStream in)    throws IOException {        preWriteRecordTasks();        try {            write(getMetaLine(uri, contentType, hostIP,                    fetchBeginTimeStamp, recordLength).getBytes(UTF8));            readFullyFrom(in, recordLength, this.readbuffer);            write(LINE_SEPARATOR);        } finally {            postWriteRecordTasks();        }    }    public void write(String uri, String contentType, String hostIP,            long fetchBeginTimeStamp, int recordLength,            ReplayInputStream ris)    throws IOException {        preWriteRecordTasks();        try {            write(getMetaLine(uri, contentType, hostIP,                    fetchBeginTimeStamp, recordLength).getBytes(UTF8));            try {                ris.readFullyTo(getOutputStream());                long remaining = ris.remaining();                // Should be zero at this stage.  If not, something is                // wrong.                if (remaining != 0) {                    String message = "Gap between expected and actual: " +                        remaining + LINE_SEPARATOR + DevUtils.extraInfo() +                        " writing arc " + this.getFile().getAbsolutePath();                    DevUtils.warnHandle(new Throwable(message), message);                    throw new IOException(message);                }            } finally {                ris.close();            }                         // Write out trailing newline            write(LINE_SEPARATOR);        } finally {            postWriteRecordTasks();        }    }        /**     * @param uri     * @param contentType     * @param hostIP     * @param fetchBeginTimeStamp     * @param recordLength     * @return Metadata line for an ARCRecord made of passed components.     * @exception IOException     */    protected String getMetaLine(String uri, String contentType, String hostIP,        long fetchBeginTimeStamp, int recordLength)    throws IOException {        if (fetchBeginTimeStamp <= 0) {            throw new IOException("Bogus fetchBeginTimestamp: " +                Long.toString(fetchBeginTimeStamp));        }        return validateMetaLine(createMetaline(uri, hostIP,             ArchiveUtils.get14DigitDate(fetchBeginTimeStamp),            MimetypeUtils.truncate(contentType),            Integer.toString(recordLength)));    }        public String createMetaline(String uri, String hostIP,            String timeStamp, String mimetype, String recordLength) {        return uri + HEADER_FIELD_SEPARATOR + hostIP +            HEADER_FIELD_SEPARATOR + timeStamp +            HEADER_FIELD_SEPARATOR + mimetype +            HEADER_FIELD_SEPARATOR + recordLength + LINE_SEPARATOR;    }        /**     * Test that the metadata line is valid before writing.     * @param metaLineStr     * @throws IOException     * @return The passed in metaline.     */    protected String validateMetaLine(String metaLineStr)    throws IOException {        if (metaLineStr.length() > MAX_METADATA_LINE_LENGTH) {        	throw new IOException("Metadata line length is " +                metaLineStr.length() + " which is > than maximum " +                MAX_METADATA_LINE_LENGTH);        }     	Matcher m = METADATA_LINE_PATTERN.matcher(metaLineStr);        if (!m.matches()) {            throw new IOException("Metadata line doesn't match expected" +                " pattern: " + metaLineStr);        }        return metaLineStr;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -