⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 recordingoutputstream.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* ReplayableOutputStream * * $Id: RecordingOutputStream.java 5080 2007-04-13 20:30:49Z gojomo $ * * Created on Sep 23, 2003 * * Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package org.archive.io;import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStream;import java.security.MessageDigest;import java.security.NoSuchAlgorithmException;import java.util.logging.Level;import java.util.logging.Logger;import org.archive.util.IoUtils;/** * An output stream that records all writes to wrapped output * stream. * * A RecordingOutputStream can be wrapped around any other * OutputStream to record all bytes written to it.  You can * then request a ReplayInputStream to read those bytes. * * <p>The RecordingOutputStream uses an in-memory buffer and * backing disk file to allow it to record streams of * arbitrary length limited only by available disk space. * * <p>As long as the stream recorded is smaller than the * in-memory buffer, no disk access will occur. * * <p>Recorded content can be recovered as a ReplayInputStream * (via getReplayInputStream() or, for only the content after * the content-begin-mark is set, getContentReplayInputStream() ) * or as a ReplayCharSequence (via getReplayCharSequence()). * * <p>This class is also used as a straight output stream * by {@link RecordingInputStream} to which it records all reads. * {@link RecordingInputStream} is exploiting the file backed buffer * facility of this class passing <code>null</code> for the stream * to wrap.  TODO: Make a FileBackedOutputStream class that is * subclassed by RecordingInputStream. * * @author gojomo * */public class RecordingOutputStream extends OutputStream {    protected static Logger logger =        Logger.getLogger(RecordingOutputStream.class.getName());        /**     * Size of recording.     *     * Later passed to ReplayInputStream on creation.  It uses it to know when     * EOS.     */    private long size = 0;    private String backingFilename;    private OutputStream diskStream = null;    /**     * Buffer we write recordings to.     *     * We write all recordings here first till its full.  Thereafter we     * write the backing file.     */    private byte[] buffer;    /** current virtual position in the recording */    private long position;        /** flag to disable recording */    private boolean recording;        /**     * Reusable buffer for FastBufferedOutputStream     */    protected byte[] bufStreamBuf =         new byte [ FastBufferedOutputStream.DEFAULT_BUFFER_SIZE ];        /**     * True if we're to digest content.     */    private boolean shouldDigest = false;     /**     * Digest instance.     */    private MessageDigest digest = null;    /**     * Define for SHA1 alogarithm.     */    private static final String SHA1 = "SHA1";    /**     * Maximum amount of header material to accept without the content     * body beginning -- if more, throw a RecorderTooMuchHeaderException.     * TODO: make configurable? make smaller?     */    protected static final long MAX_HEADER_MATERIAL = 1024*1024; // 1MB        // configurable max length, max time limits    /** maximum length of material to record before throwing exception */     protected long maxLength = Long.MAX_VALUE;    /** maximum time to record before throwing exception */     protected long timeoutMs = Long.MAX_VALUE;    /** maximum rate to record (adds delays to hit target rate) */     protected long maxRateBytesPerMs = Long.MAX_VALUE;    /** time recording begins for timeout, rate calculations */     protected long startTime = Long.MAX_VALUE;        /**     * When recording HTTP, where the content-body starts.     */    private long contentBeginMark;    /**     * Stream to record.     */    private OutputStream out = null;    // mark/reset support     /** furthest position reached before any reset()s */    private long maxPosition = 0;    /** remembered position to reset() to */     private long markPosition = 0;     /**     * Create a new RecordingOutputStream.     *     * @param bufferSize Buffer size to use.     * @param backingFilename Name of backing file to use.     */    public RecordingOutputStream(int bufferSize, String backingFilename) {        this.buffer = new byte[bufferSize];        this.backingFilename = backingFilename;        recording = true;    }    /**     * Wrap the given stream, both recording and passing along any data written     * to this RecordingOutputStream.     *     * @throws IOException If failed creation of backing file.     */    public void open() throws IOException {        this.open(null);    }    /**     * Wrap the given stream, both recording and passing along any data written     * to this RecordingOutputStream.     *     * @param wrappedStream Stream to wrap.  May be null for case where we     * want to write to a file backed stream only.     *     * @throws IOException If failed creation of backing file.     */    public void open(OutputStream wrappedStream) throws IOException {        if(isOpen()) {            // error; should not be opening/wrapping in an unclosed             // stream remains open            throw new IOException("ROS already open for "                    +Thread.currentThread().getName());        }        this.out = wrappedStream;        this.position = 0;        this.markPosition = 0;        this.maxPosition = 0;         this.size = 0;        this.contentBeginMark = -1;        // ensure recording turned on        this.recording = true;        // Always begins false; must use startDigest() to begin        this.shouldDigest = false;        if (this.diskStream != null) {            closeDiskStream();        }        if (this.diskStream == null) {            // TODO: Fix so we only make file when its actually needed.            FileOutputStream fis = new FileOutputStream(this.backingFilename);                        this.diskStream = new RecyclingFastBufferedOutputStream(fis, bufStreamBuf);        }        startTime = System.currentTimeMillis();    }    public void write(int b) throws IOException {        if(position<maxPosition) {            // revisiting previous content; do nothing but advance position            position++;            return;         }        if(recording) {            record(b);        }        if (this.out != null) {            this.out.write(b);        }        checkLimits();    }    public void write(byte[] b, int off, int len) throws IOException {        if(position < maxPosition) {            if(position+len<=maxPosition) {                // revisiting; do nothing but advance position                position += len;                return;            }            // consume part of the array doing nothing but advancing position            long consumeRange = maxPosition - position;             position += consumeRange;            off += consumeRange;            len -= consumeRange;         }        if(recording) {            record(b, off, len);        }        if (this.out != null) {            this.out.write(b, off, len);        }        checkLimits();    }        /**     * Check any enforced limits.      */    protected void checkLimits() throws RecorderIOException {        // too much material before finding end of headers?         if (contentBeginMark<0) {            // no mark yet            if(position>MAX_HEADER_MATERIAL) {                throw new RecorderTooMuchHeaderException();            }        }        // overlong?        if(position>maxLength) {            throw new RecorderLengthExceededException();         }        // taking too long?         long duration = System.currentTimeMillis() - startTime + 1; // !divzero        if(duration>timeoutMs) {            throw new RecorderTimeoutException();         }        // need to throttle reading to hit max configured rate?         if(position/duration > maxRateBytesPerMs) {            long desiredDuration = position / maxRateBytesPerMs;            try {                Thread.sleep(desiredDuration-duration);            } catch (InterruptedException e) {                logger.log(Level.WARNING,                        "bandwidth throttling sleep interrupted", e);            }         }    }    /**     * Record the given byte for later recovery     *     * @param b Int to record.     *     * @exception IOException Failed write to backing file.     */    private void record(int b) throws IOException {        if (this.shouldDigest) {            this.digest.update((byte)b);        }        if (this.position >= this.buffer.length) {            // TODO: Its possible to call write w/o having first opened a            // stream.  Protect ourselves against this.            assert this.diskStream != null: "Diskstream is null";            this.diskStream.write(b);        } else {            this.buffer[(int) this.position] = (byte) b;        }        this.position++;    }    /**     * Record the given byte-array range for recovery later     *     * @param b Buffer to record.     * @param off Offset into buffer at which to start recording.     * @param len Length of buffer to record.     *     * @exception IOException Failed write to backing file.     */    private void record(byte[] b, int off, int len) throws IOException {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -