⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gzippedinputstream.java

📁 这是个爬虫和lucece相结合最好了
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
                try {                    gotoEOR();                } catch (IOException e) {                    if ((e instanceof ZipException) ||                        (e.getMessage() != null &&                         e.getMessage().startsWith("Corrupt GZIP trailer"))) {                        // Try skipping end of bad record; try moving to next.                        logger.info("Skipping exception " + e.getMessage());                    } else {                        throw new RuntimeException(e);                    }                }                return moveToNextGzipMember();            }                        /**             * @return An InputStream onto a GZIP Member.             */            public Object next() {                try {                    gzipMemberSeek();                } catch (IOException e) {                    throw new RuntimeException("Failed move to EOR or " +                        "failed header read: " + e.getMessage());                }                return this.compressedStream;            }                        public void remove() {                throw new UnsupportedOperationException();            }        };       }        /**     * @return True if we found another record in the stream.     */    protected boolean moveToNextGzipMember() {        boolean result = false;        // Move to the next gzip member, if there is one, positioning        // ourselves by backing up the stream so we reread any inflater        // remaining bytes. Then add 8 bytes to get us past the GZIP        // CRC trailer block that ends all gzip members.        try {            RepositionableStream ps = (RepositionableStream)getInputStream();            // 8 is sizeof gzip CRC block thats on tail of gzipped            // record. If remaining is < 8 then experience indicates            // we're seeking past the gzip header -- don't backup the            // stream.            if (getInflater().getRemaining() > GZIP_TRAILER_LENGTH) {                ps.position(position() - getInflater().getRemaining() +                    GZIP_TRAILER_LENGTH);            }            for (int read = -1, headerRead = 0; true; headerRead = 0) {                // Give a hint to underlying stream that we're going to want to                // do some backing up.                getInputStream().mark(3);                if ((read = getInputStream().read()) == -1) {                    break;                }                if(compareBytes(read, GZIPInputStream.GZIP_MAGIC)) {                    headerRead++;                    if ((read = getInputStream().read()) == -1) {                    	break;                    }                    if(compareBytes(read, GZIPInputStream.GZIP_MAGIC >> 8)) {                        headerRead++;                        if ((read = getInputStream().read()) == -1) {                        	break;                        }                        if (compareBytes(read, Deflater.DEFLATED)) {                            headerRead++;                            // Found gzip header. Backup the stream the                            // bytes we just found and set result true.                            getInputStream().reset();                            result = true;                            break;                        }                    }                    // Didn't find gzip header.  Reset stream but one byte                    // futher on then redo header tests.                    ps.position(ps.position() - headerRead);                }            }        } catch (IOException e) {            throw new RuntimeException("Failed i/o: " + e.getMessage());        }        return result;    }        protected boolean compareBytes(final int a, final int b) {    	return ((byte)(a & 0xff)) == ((byte)(b & 0xff));    }      protected Inflater getInflater() {        return this.inf;    }        protected InputStream getInputStream() {        return this.in;    }        protected GzipHeader getGzipHeader() {        return this.gzipHeader;    }        /**     * Move to next gzip member in the file.     */    protected void resetInflater() {        this.eos = false;        this.inf.reset();    }        /**     * Read in the gzip header.     * @throws IOException     */    protected void readHeader() throws IOException {        new GzipHeader(this.in);        // Reset the crc for subsequent reads.        this.crc.reset();    }    /**     * Seek to passed offset.     *      * After positioning the stream, it resets the inflater.     * Assumption is that public use of this method is only     * to position stream at start of a gzip member.     *      * @param position Absolute position of a gzip member start.     * @throws IOException     */    public void position(long position) throws IOException {        ((RepositionableStream)this.in).position(position);        resetInflater();    }    public long position() throws IOException {       return  ((RepositionableStream)this.in).position();    }        /**     * Seek to a gzip member.     *      * Moves stream to new position, resets inflater and reads in the gzip     * header ready for subsequent calls to read.     *      * @param position Absolute position of a gzip member start.     * @throws IOException     */    public void gzipMemberSeek(long position) throws IOException {        position(position);        readHeader();    }        public void gzipMemberSeek() throws IOException {        gzipMemberSeek(position());    }        /**     * Gzip passed bytes.     * Use only when bytes is small.     * @param bytes What to gzip.     * @return A gzip member of bytes.     * @throws IOException     */    public static byte [] gzip(byte [] bytes) throws IOException {        ByteArrayOutputStream baos = new ByteArrayOutputStream();        GZIPOutputStream gzipOS = new GZIPOutputStream(baos);        gzipOS.write(bytes, 0, bytes.length);        gzipOS.close();        return baos.toByteArray();    }        /**     * Tests passed stream is GZIP stream by reading in the HEAD.     * Does reposition of stream when done.     * @param rs An InputStream that is Repositionable.     * @return True if compressed stream.     * @throws IOException     */    public static boolean isCompressedRepositionableStream(            final RepositionableStream rs)    throws IOException {        boolean result = false;        long p = rs.position();        try {            result = isCompressedStream((InputStream)rs);        } finally {            rs.position(p);        }        return result;     }        /**     * Tests passed stream is gzip stream by reading in the HEAD.     * Does not reposition stream when done.     * @param is An InputStream.     * @return True if compressed stream.     * @throws IOException     */    public static boolean isCompressedStream(final InputStream is)    throws IOException {        try {            new GzipHeader(is);        } catch (NoGzipMagicException e) {            return false;        }        return true;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -