📄 gzipheader.java
字号:
/* GzipHeader** $Id: GzipHeader.java 4064 2005-12-20 18:11:33Z stack-sf $** Created on July 5, 2004** Copyright (C) 2004 Internet Archive.** This file is part of the Heritrix web crawler (crawler.archive.org).** Heritrix is free software; you can redistribute it and/or modify* it under the terms of the GNU Lesser Public License as published by* the Free Software Foundation; either version 2.1 of the License, or* any later version.** Heritrix is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU Lesser Public License for more details.** You should have received a copy of the GNU Lesser Public License* along with Heritrix; if not, write to the Free Software* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/package org.archive.io;import java.io.EOFException;import java.io.IOException;import java.io.InputStream;import java.util.zip.CRC32;import java.util.zip.Deflater;import java.util.zip.GZIPInputStream;/** * Read in the GZIP header. * * See RFC1952 for specification on what the header looks like. * Assumption is that stream is cued-up with the gzip header as the * next thing to be read. * * <p>Of <a href="http://jguru.com/faq/view.jsp?EID=13647">Java * and unsigned bytes</a>. That is, its always a signed int in * java no matter what the qualifier whether byte, char, etc. * * <p>Add accessors for optional filename, comment and MTIME. * * @author stack */public class GzipHeader { /** * Length of minimal GZIP header. * * See RFC1952 for explaination of value of 10. */ public static final int MINIMAL_GZIP_HEADER_LENGTH = 10; /** * Total length of the gzip header. */ protected int length = 0; /** * The GZIP header FLG byte. */ protected int flg; /** * GZIP header XFL byte. */ private int xfl; /** * GZIP header OS byte. */ private int os; /** * Extra header field content. */ private byte [] fextra = null; /** * GZIP header MTIME field. */ private int mtime; /** * Shutdown constructor. * * Must pass an input stream. */ public GzipHeader() { super(); } /** * Constructor. * * This constructor advances the stream past any gzip header found. * * @param in InputStream to read from. * @throws IOException */ public GzipHeader(InputStream in) throws IOException { super(); readHeader(in); } /** * Read in gzip header. * * Advances the stream past the gzip header. * @param in InputStream. * * @throws IOException Throws if does not start with GZIP Header. */ public void readHeader(InputStream in) throws IOException { CRC32 crc = new CRC32(); crc.reset(); if (!testGzipMagic(in, crc)) { throw new NoGzipMagicException(); } this.length += 2; if (readByte(in, crc) != Deflater.DEFLATED) { throw new IOException("Unknown compression"); } this.length++; // Get gzip header flag. this.flg = readByte(in, crc); this.length++; // Get MTIME. this.mtime = readInt(in, crc); this.length += 4; // Read XFL and OS. this.xfl = readByte(in, crc); this.length++; this.os = readByte(in, crc); this.length++; // Skip optional extra field -- stuff w/ alexa stuff in it. final int FLG_FEXTRA = 4; if ((this.flg & FLG_FEXTRA) == FLG_FEXTRA) { int count = readShort(in, crc); this.length +=2; this.fextra = new byte[count]; readByte(in, crc, this.fextra, 0, count); this.length += count; } // Skip file name. It ends in null. final int FLG_FNAME = 8; if ((this.flg & FLG_FNAME) == FLG_FNAME) { while (readByte(in, crc) != 0) { this.length++; } } // Skip file comment. It ends in null. final int FLG_FCOMMENT = 16; // File comment if ((this.flg & FLG_FCOMMENT) == FLG_FCOMMENT) { while (readByte(in, crc) != 0) { this.length++; } } // Check optional CRC. final int FLG_FHCRC = 2; if ((this.flg & FLG_FHCRC) == FLG_FHCRC) { int calcCrc = (int)(crc.getValue() & 0xffff); if (readShort(in, crc) != calcCrc) { throw new IOException("Bad header CRC"); } this.length += 2; } } /** * Test gzip magic is next in the stream. * Reads two bytes. Caller needs to manage resetting stream. * @param in InputStream to read. * @return true if found gzip magic. False otherwise * or an IOException (including EOFException). * @throws IOException */ public boolean testGzipMagic(InputStream in) throws IOException { return testGzipMagic(in, null); } /** * Test gzip magic is next in the stream. * Reads two bytes. Caller needs to manage resetting stream. * @param in InputStream to read. * @param crc CRC to update. * @return true if found gzip magic. False otherwise * or an IOException (including EOFException). * @throws IOException */ public boolean testGzipMagic(InputStream in, CRC32 crc) throws IOException { return readShort(in, crc) == GZIPInputStream.GZIP_MAGIC; } /** * Read an int. * * We do not expect to get a -1 reading. If we do, we throw exception. * Update the crc as we go. * * @param in InputStream to read. * @param crc CRC to update. * @return int read. * * @throws IOException */ private int readInt(InputStream in, CRC32 crc) throws IOException { int s = readShort(in, crc); return ((readShort(in, crc) << 16) & 0xffff0000) | s; } /** * Read a short. * * We do not expect to get a -1 reading. If we do, we throw exception. * Update the crc as we go. * * @param in InputStream to read. * @param crc CRC to update. * @return Short read. * * @throws IOException */ private int readShort(InputStream in, CRC32 crc) throws IOException { int b = readByte(in, crc); return ((readByte(in, crc) << 8) & 0x00ff00) | b; } /** * Read a byte. * * We do not expect to get a -1 reading. If we do, we throw exception. * Update the crc as we go. * * @param in InputStream to read. * @return Byte read. * * @throws IOException */ protected int readByte(InputStream in) throws IOException { return readByte(in, null); } /** * Read a byte. * * We do not expect to get a -1 reading. If we do, we throw exception. * Update the crc as we go. * * @param in InputStream to read. * @param crc CRC to update. * @return Byte read. * * @throws IOException */ protected int readByte(InputStream in, CRC32 crc) throws IOException { int b = in.read(); if (b == -1) { throw new EOFException(); } if (crc != null) { crc.update(b); } return b & 0xff; } /** * Read a byte. * * We do not expect to get a -1 reading. If we do, we throw exception. * Update the crc as we go. * * @param in InputStream to read. * @param crc CRC to update. * @param buffer Buffer to read into. * @param offset Offset to start filling buffer at. * @param length How much to read. * @return Bytes read. * * @throws IOException */ protected int readByte(InputStream in, CRC32 crc, byte [] buffer, int offset, int length) throws IOException { for (int i = offset; i < length; i++) { buffer[offset + i] = (byte)readByte(in, crc); } return length; } /** * @return Returns the fextra. */ public byte[] getFextra() { return this.fextra; } /** * @return Returns the flg. */ public int getFlg() { return this.flg; } /** * @return Returns the os. */ public int getOs() { return this.os; } /** * @return Returns the xfl. */ public int getXfl() { return this.xfl; } /** * @return Returns the mtime. */ public int getMtime() { return this.mtime; } /** * @return Returns the length. */ public int getLength() { return length; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -