⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.c

📁 给出了 zip 压缩算法的完整实现过程。
💻 C
📖 第 1 页 / 共 4 页
字号:
        cmpr_len_bits &= 7L;    }    Assert(((cmpr_bytelen << 3) + cmpr_len_bits) == bits_sent,            "bad compressed size");    init_block();    if (eof) {#if defined(PGP) && !defined(MMAP)        /* Wipe out sensitive data for pgp */# ifdef DYN_ALLOC        extern uch *window;# else        extern uch window[];# endif        memset(window, 0, (unsigned)(2*WSIZE-1)); /* -1 needed if WSIZE=32K */#else /* !PGP */        Assert(input_len == isize, "bad input size");#endif        bi_windup();        cmpr_len_bits += 7;  /* align on byte boundary */    }    Tracev((stderr,"\ncomprlen %lu(%lu) ", cmpr_bytelen + (cmpr_len_bits>>3),           (cmpr_bytelen << 3) + cmpr_len_bits - 7*eof));    Trace((stderr, "\n"));    return cmpr_bytelen + (cmpr_len_bits >> 3);}/* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */int ct_tally (dist, lc)    int dist;  /* distance of matched string */    int lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */{    l_buf[last_lit++] = (uch)lc;    if (dist == 0) {        /* lc is the unmatched char */        dyn_ltree[lc].Freq++;    } else {        /* Here, lc is the match length - MIN_MATCH */        dist--;             /* dist = match distance - 1 */        Assert((ush)dist < (ush)MAX_DIST &&               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&               (ush)d_code(dist) < (ush)D_CODES,  "ct_tally: bad match");        dyn_ltree[length_code[lc]+LITERALS+1].Freq++;        dyn_dtree[d_code(dist)].Freq++;        d_buf[last_dist++] = (ush)dist;        flags |= flag_bit;    }    flag_bit <<= 1;    /* Output the flags if they fill a byte: */    if ((last_lit & 7) == 0) {        flag_buf[last_flags++] = flags;        flags = 0, flag_bit = 1;    }    /* Try to guess if it is profitable to stop the current block here */    if (level > 2 && (last_lit & 0xfff) == 0) {        /* Compute an upper bound for the compressed length */        ulg out_length = (ulg)last_lit*8L;        ulg in_length = (ulg)strstart-block_start;        int dcode;        for (dcode = 0; dcode < D_CODES; dcode++) {            out_length += (ulg)dyn_dtree[dcode].Freq*(5L+extra_dbits[dcode]);        }        out_length >>= 3;        Trace((stderr,"\nlast_lit %u, last_dist %u, in %ld, out ~%ld(%ld%%) ",               last_lit, last_dist, in_length, out_length,               100L - out_length*100L/in_length));        if (last_dist < last_lit/2 && out_length < in_length/2) return 1;    }    return (last_lit == LIT_BUFSIZE-1 || last_dist == DIST_BUFSIZE);    /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K     * on 16 bit machines and because stored blocks are restricted to     * 64K-1 bytes.     */}/* =========================================================================== * Send the block data compressed using the given Huffman trees */local void compress_block(ltree, dtree)    ct_data near *ltree; /* literal tree */    ct_data near *dtree; /* distance tree */{    unsigned dist;      /* distance of matched string */    int lc;             /* match length or unmatched char (if dist == 0) */    unsigned lx = 0;    /* running index in l_buf */    unsigned dx = 0;    /* running index in d_buf */    unsigned fx = 0;    /* running index in flag_buf */    uch flag = 0;       /* current flags */    unsigned code;      /* the code to send */    int extra;          /* number of extra bits to send */    if (last_lit != 0) do {        if ((lx & 7) == 0) flag = flag_buf[fx++];        lc = l_buf[lx++];        if ((flag & 1) == 0) {            send_code(lc, ltree); /* send a literal byte */            Tracecv(isgraph(lc), (stderr," '%c' ", lc));        } else {            /* Here, lc is the match length - MIN_MATCH */            code = length_code[lc];            send_code(code+LITERALS+1, ltree); /* send the length code */            extra = extra_lbits[code];            if (extra != 0) {                lc -= base_length[code];                send_bits(lc, extra);        /* send the extra length bits */            }            dist = d_buf[dx++];            /* Here, dist is the match distance - 1 */            code = d_code(dist);            Assert(code < D_CODES, "bad d_code");            send_code(code, dtree);       /* send the distance code */            extra = extra_dbits[code];            if (extra != 0) {                dist -= base_dist[code];                send_bits(dist, extra);   /* send the extra distance bits */            }        } /* literal or match pair ? */        flag >>= 1;    } while (lx < last_lit);    send_code(END_BLOCK, ltree);}/* =========================================================================== * Set the file type to TEXT (ASCII) or BINARY, using following algorithm: * - TEXT, either ASCII or an ASCII-compatible extension such as ISO-8859, *   UTF-8, etc., when the following two conditions are satisfied: *    a) There are no non-portable control characters belonging to the *       "black list" (0..6, 14..25, 28..31). *    b) There is at least one printable character belonging to the *       "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). * - BINARY otherwise. * * Note that the following partially-portable control characters form a * "gray list" that is ignored in this detection algorithm: * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). * * Also note that, unlike in the previous 20% binary detection algorithm, * any control characters in the black list will set the file type to * BINARY.  If a text file contains a single accidental black character, * the file will be flagged as BINARY in the archive. * * IN assertion: the fields freq of dyn_ltree are set. */local void set_file_type(){    /* bit-mask of black-listed bytes     * bit is set if byte is black-listed     * set bits 0..6, 14..25, and 28..31     * 0xf3ffc07f = binary 11110011111111111100000001111111     */    unsigned long mask = 0xf3ffc07fUL;    int n;    /* Check for non-textual ("black-listed") bytes. */    for (n = 0; n <= 31; n++, mask >>= 1)        if ((mask & 1) && (dyn_ltree[n].Freq != 0))        {            *file_type = BINARY;            return;        }    /* Check for textual ("white-listed") bytes. */    *file_type = ASCII;    if (dyn_ltree[9].Freq != 0 || dyn_ltree[10].Freq != 0            || dyn_ltree[13].Freq != 0)        return;    for (n = 32; n < LITERALS; n++)        if (dyn_ltree[n].Freq != 0)            return;    /* This deflate stream is either empty, or     * it has tolerated ("gray-listed") bytes only.     */    *file_type = BINARY;}/* =========================================================================== * Initialize the bit string routines. */void bi_init (tgt_buf, tgt_size, flsh_allowed)    char *tgt_buf;    unsigned tgt_size;    int flsh_allowed;{    out_buf = tgt_buf;    out_size = tgt_size;    out_offset = 0;    flush_flg = flsh_allowed;    bi_buf = 0;    bi_valid = 0;#ifdef DEBUG    bits_sent = 0L;#endif}#if (!defined(ASMV) || !defined(RISCOS))/* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. */local void send_bits(value, length)    int value;  /* value to send */    int length; /* number of bits */{#ifdef DEBUG    Tracevv((stderr," l %2d v %4x ", length, value));    Assert(length > 0 && length <= 15, "invalid length");    bits_sent += (ulg)length;#endif    /* If not enough room in bi_buf, use (bi_valid) bits from bi_buf and     * (Buf_size - bi_valid) bits from value to flush the filled bi_buf,     * then fill in the rest of (value), leaving (length - (Buf_size-bi_valid))     * unused bits in bi_buf.     */    bi_buf |= (value << bi_valid);    bi_valid += length;    if (bi_valid > (int)Buf_size) {        PUTSHORT(bi_buf);        bi_valid -= Buf_size;        bi_buf = (unsigned)value >> (length - bi_valid);    }}/* =========================================================================== * Reverse the first len bits of a code, using straightforward code (a faster * method would use a table) * IN assertion: 1 <= len <= 15 */local unsigned bi_reverse(code, len)    unsigned code; /* the value to invert */    int len;       /* its bit length */{    register unsigned res = 0;    do {        res |= code & 1;        code >>= 1, res <<= 1;    } while (--len > 0);    return res >> 1;}#endif /* !ASMV || !RISCOS *//* =========================================================================== * Write out any remaining bits in an incomplete byte. */local void bi_windup(){    if (bi_valid > 8) {        PUTSHORT(bi_buf);    } else if (bi_valid > 0) {        PUTBYTE(bi_buf);    }    if (flush_flg) {        flush_outbuf(out_buf, &out_offset);    }    bi_buf = 0;    bi_valid = 0;#ifdef DEBUG    bits_sent = (bits_sent+7) & ~7;#endif}/* =========================================================================== * Copy a stored block to the zip file, storing first the length and its * one's complement if requested. * * Buffer Overwrite fix * * A buffer flush has been added to fix a bug when encrypting deflated files * with embedded "copied blocks".  When encrypting, the flush_out() routine * modifies its data buffer because encryption is done "in-place" in * zfwrite(), whereas without encryption, the flush_out() data buffer is * left unaltered.  This can be a problem as noted below by the submitter. * * "But an exception comes when a block of stored data (data that could not * be compressed) is being encrypted. In this case, the data that is passed * to zfwrite (and is therefore encrypted-in-place) is actually a block of * data from within the sliding input window that is being managed by * deflate.c. * * "Since part of the sliding input window has now been overwritten by * encrypted (and essentially random) data, deflate.c's search for previous * text that matches the current text will usually fail but on rare * occasions will find a match with something in the encrypted data. This * incorrect match then causes incorrect information to be placed in the * ZIP file." * * The problem results in the zip file having bad data and so a bad CRC. * This does not happen often and to recreate the problem a large file * with non-compressable data is needed so that deflate chooses to store the * data.  A test file of 400 MB seems large enough to recreate the problem * using a command such as *     zip -1 -e crcerror.zip testfile.dat * maybe half the time. * * This problem has been fixed by copying the data into the deflate output * buffer before calling flush_outbuf(), when encryption is enabled. * * Thanks to the nice people at WinZip for identifying the problem and * passing it on.  Also see Changes. * * 2006-03-05 EG, CS */local void copy_block(block, len, header)    char *block;  /* the input data */    unsigned len; /* its length */    int header;   /* true if block header must be written */{    bi_windup();              /* align on byte boundary */    if (header) {        PUTSHORT((ush)len);        PUTSHORT((ush)~len);#ifdef DEBUG        bits_sent += 2*16;#endif    }    if (flush_flg) {        flush_outbuf(out_buf, &out_offset);        if (key != (char *)NULL) {  /* key is the global password pointer */            /* Encryption modifies the data in the output buffer. But the             * copied input data must remain intact for further deflate             * string matching lookups.  Therefore, the input data is             * copied into the compression output buffer for flushing             * to the compressed/encrypted output stream.             */            while(len > 0) {                out_offset = (len < out_size ? len : out_size);                memcpy(out_buf, block, out_offset);                block += out_offset;                len -= out_offset;                flush_outbuf(out_buf, &out_offset);            }        } else {            /* Without encryption, the output routines do not touch the             * written data, so there is no need for an additional copy             * operation.             */            out_offset = len;            flush_outbuf(block, &out_offset);        }    } else if (out_offset + len > out_size) {        error("output buffer too small for in-memory compression");    } else {        memcpy(out_buf + out_offset, block, len);        out_offset += len;    }#ifdef DEBUG    bits_sent += (ulg)len<<3;#endif}#endif /* !USE_ZLIB */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -