📄 blast.c
字号:
/* blast.c * Copyright (C) 2003 Mark Adler * For conditions of distribution and use, see copyright notice in blast.h * version 1.1, 16 Feb 2003 * * blast.c decompresses data compressed by the PKWare Compression Library. * This function provides functionality similar to the explode() function of * the PKWare library, hence the name "blast". * * This decompressor is based on the excellent format description provided by * Ben Rudiak-Gould in comp.compression on August 13, 2001. Interestingly, the * example Ben provided in the post is incorrect. The distance 110001 should * instead be 111000. When corrected, the example byte stream becomes: * * 00 04 82 24 25 8f 80 7f * * which decompresses to "AIAIAIAIAIAIA" (without the quotes). *//* * Change history: * * 1.0 12 Feb 2003 - First version * 1.1 16 Feb 2003 - Fixed distance check for > 4 GB uncompressed data */#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */#include "blast.h" /* prototype for blast() */#define local static /* for local function definitions */#define MAXBITS 13 /* maximum code length */#define MAXWIN 4096 /* maximum window size *//* input and output state */struct state { /* input state */ blast_in infun; /* input function provided by user */ void *inhow; /* opaque information passed to infun() */ unsigned char *in; /* next input location */ unsigned left; /* available input at in */ int bitbuf; /* bit buffer */ int bitcnt; /* number of bits in bit buffer */ /* input limit error return state for bits() and decode() */ jmp_buf env; /* output state */ blast_out outfun; /* output function provided by user */ void *outhow; /* opaque information passed to outfun() */ unsigned next; /* index of next write location in out[] */ int first; /* true to check distances (for first 4K) */ unsigned char out[MAXWIN]; /* output buffer and sliding window */};/* * Return need bits from the input stream. This always leaves less than * eight bits in the buffer. bits() works properly for need == 0. * * Format notes: * * - Bits are stored in bytes from the least significant bit to the most * significant bit. Therefore bits are dropped from the bottom of the bit * buffer, using shift right, and new bytes are appended to the top of the * bit buffer, using shift left. */local int bits(struct state *s, int need){ int val; /* bit accumulator */ /* load at least need bits into val */ val = s->bitbuf; while (s->bitcnt < need) { if (s->left == 0) { s->left = s->infun(s->inhow, &(s->in)); if (s->left == 0) longjmp(s->env, 1); /* out of input */ } val |= (int)(*(s->in)++) << s->bitcnt; /* load eight bits */ s->left--; s->bitcnt += 8; } /* drop need bits and update buffer, always zero to seven bits left */ s->bitbuf = val >> need; s->bitcnt -= need; /* return need bits, zeroing the bits above that */ return val & ((1 << need) - 1);}/* * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of * each length, which for a canonical code are stepped through in order. * symbol[] are the symbol values in canonical order, where the number of * entries is the sum of the counts in count[]. The decoding process can be * seen in the function decode() below. */struct huffman { short *count; /* number of symbols of each length */ short *symbol; /* canonically ordered symbols */};/* * Decode a code from the stream s using huffman table h. Return the symbol or * a negative value if there is an error. If all of the lengths are zero, i.e. * an empty code, or if the code is incomplete and an invalid code is received, * then -9 is returned after reading MAXBITS bits. * * Format notes: * * - The codes as stored in the compressed data are bit-reversed relative to * a simple integer ordering of codes of the same lengths. Hence below the * bits are pulled from the compressed data one at a time and used to * build the code value reversed from what is in the stream in order to * permit simple integer comparisons for decoding. * * - The first code for the shortest length is all ones. Subsequent codes of * the same length are simply integer decrements of the previous code. When * moving up a length, a one bit is appended to the code. For a complete * code, the last code of the longest length will be all zeros. To support * this ordering, the bits pulled during decoding are inverted to apply the * more "natural" ordering starting with all zeros and incrementing. */local int decode(struct state *s, struct huffman *h){ int len; /* current number of bits in code */ int code; /* len bits being decoded */ int first; /* first code of length len */ int count; /* number of codes of length len */ int index; /* index of first code of length len in symbol table */ int bitbuf; /* bits from stream */ int left; /* bits left in next or left to process */ short *next; /* next number of codes */ bitbuf = s->bitbuf; left = s->bitcnt; code = first = index = 0; len = 1; next = h->count + 1; while (1) { while (left--) { code |= (bitbuf & 1) ^ 1; /* invert code */ bitbuf >>= 1; count = *next++; if (code < first + count) { /* if length len, return symbol */ s->bitbuf = bitbuf; s->bitcnt = (s->bitcnt - len) & 7; return h->symbol[index + (code - first)]; } index += count; /* else update for next length */ first += count; first <<= 1; code <<= 1; len++; } left = (MAXBITS+1) - len; if (left == 0) break; if (s->left == 0) { s->left = s->infun(s->inhow, &(s->in)); if (s->left == 0) longjmp(s->env, 1); /* out of input */ } bitbuf = *(s->in)++; s->left--; if (left > 8) left = 8; } return -9; /* ran out of codes */}/* * Given a list of repeated code lengths rep[0..n-1], where each byte is a * count (high four bits + 1) and a code length (low four bits), generate the * list of code lengths. This compaction reduces the size of the object code. * Then given the list of code lengths length[0..n-1] representing a canonical * Huffman code for n symbols, construct the tables required to decode those * codes. Those tables are the number of codes of each length, and the symbols * sorted by length, retaining their original order within each length. The * return value is zero for a complete code set, negative for an over- * subscribed code set, and positive for an incomplete code set. The tables * can be used if the return value is zero or positive, but they cannot be used * if the return value is negative. If the return value is zero, it is not * possible for decode() using that table to return an error--any stream of * enough bits will resolve to a symbol. If the return value is positive, then * it is possible for decode() using that table to return an error for received * codes past the end of the incomplete lengths. */local int construct(struct huffman *h, const unsigned char *rep, int n){ int symbol; /* current symbol when stepping through length[] */ int len; /* current length when stepping through h->count[] */ int left; /* number of possible codes left of current length */ short offs[MAXBITS+1]; /* offsets in symbol table for each length */ short length[256]; /* code lengths */ /* convert compact repeat counts into symbol bit length list */ symbol = 0; do { len = *rep++; left = (len >> 4) + 1; len &= 15; do { length[symbol++] = len; } while (--left); } while (--n); n = symbol; /* count number of codes of each length */ for (len = 0; len <= MAXBITS; len++) h->count[len] = 0; for (symbol = 0; symbol < n; symbol++) (h->count[length[symbol]])++; /* assumes lengths are within bounds */ if (h->count[0] == n) /* no codes! */ return 0; /* complete, but decode() will fail */ /* check for an over-subscribed or incomplete set of lengths */ left = 1; /* one possible code of zero length */ for (len = 1; len <= MAXBITS; len++) { left <<= 1; /* one more bit, double codes left */ left -= h->count[len]; /* deduct count from possible codes */ if (left < 0) return left; /* over-subscribed--return negative */ } /* left > 0 means incomplete */ /* generate offsets into symbol table for each length for sorting */ offs[1] = 0; for (len = 1; len < MAXBITS; len++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -