📄 puff.c
字号:
if (left > 8) left = 8;
}
return -9; /* ran out of codes */
}
#endif /* SLOW */
/*
* Given the list of code lengths length[0..n-1] representing a canonical
* Huffman code for n symbols, construct the tables required to decode those
* codes. Those tables are the number of codes of each length, and the symbols
* sorted by length, retaining their original order within each length. The
* return value is zero for a complete code set, negative for an over-
* subscribed code set, and positive for an incomplete code set. The tables
* can be used if the return value is zero or positive, but they cannot be used
* if the return value is negative. If the return value is zero, it is not
* possible for decode() using that table to return an error--any stream of
* enough bits will resolve to a symbol. If the return value is positive, then
* it is possible for decode() using that table to return an error for received
* codes past the end of the incomplete lengths.
*
* Not used by decode(), but used for error checking, h->count[0] is the number
* of the n symbols not in the code. So n - h->count[0] is the number of
* codes. This is useful for checking for incomplete codes that have more than
* one symbol, which is an error in a dynamic block.
*
* Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS
* This is assured by the construction of the length arrays in dynamic() and
* fixed() and is not verified by construct().
*
* Format notes:
*
* - Permitted and expected examples of incomplete codes are one of the fixed
* codes and any code with a single symbol which in deflate is coded as one
* bit instead of zero bits. See the format notes for fixed() and dynamic().
*
* - Within a given code length, the symbols are kept in ascending order for
* the code bits definition.
*/
local int construct(struct huffman *h, short *length, int n)
{
int symbol; /* current symbol when stepping through length[] */
int len; /* current length when stepping through h->count[] */
int left; /* number of possible codes left of current length */
short offs[MAXBITS+1]; /* offsets in symbol table for each length */
/* count number of codes of each length */
for (len = 0; len <= MAXBITS; len++)
h->count[len] = 0;
for (symbol = 0; symbol < n; symbol++)
(h->count[length[symbol]])++; /* assumes lengths are within bounds */
if (h->count[0] == n) /* no codes! */
return 0; /* complete, but decode() will fail */
/* check for an over-subscribed or incomplete set of lengths */
left = 1; /* one possible code of zero length */
for (len = 1; len <= MAXBITS; len++) {
left <<= 1; /* one more bit, double codes left */
left -= h->count[len]; /* deduct count from possible codes */
if (left < 0) return left; /* over-subscribed--return negative */
} /* left > 0 means incomplete */
/* generate offsets into symbol table for each length for sorting */
offs[1] = 0;
for (len = 1; len < MAXBITS; len++)
offs[len + 1] = offs[len] + h->count[len];
/*
* put symbols in table sorted by length, by symbol order within each
* length
*/
for (symbol = 0; symbol < n; symbol++)
if (length[symbol] != 0)
h->symbol[offs[length[symbol]]++] = symbol;
/* return zero for complete set, positive for incomplete set */
return left;
}
/*
* Decode literal/length and distance codes until an end-of-block code.
*
* Format notes:
*
* - Compressed data that is after the block type if fixed or after the code
* description if dynamic is a combination of literals and length/distance
* pairs terminated by and end-of-block code. Literals are simply Huffman
* coded bytes. A length/distance pair is a coded length followed by a
* coded distance to represent a string that occurs earlier in the
* uncompressed data that occurs again at the current location.
*
* - Literals, lengths, and the end-of-block code are combined into a single
* code of up to 286 symbols. They are 256 literals (0..255), 29 length
* symbols (257..285), and the end-of-block symbol (256).
*
* - There are 256 possible lengths (3..258), and so 29 symbols are not enough
* to represent all of those. Lengths 3..10 and 258 are in fact represented
* by just a length symbol. Lengths 11..257 are represented as a symbol and
* some number of extra bits that are added as an integer to the base length
* of the length symbol. The number of extra bits is determined by the base
* length symbol. These are in the static arrays below, lens[] for the base
* lengths and lext[] for the corresponding number of extra bits.
*
* - The reason that 258 gets its own symbol is that the longest length is used
* often in highly redundant files. Note that 258 can also be coded as the
* base value 227 plus the maximum extra value of 31. While a good deflate
* should never do this, it is not an error, and should be decoded properly.
*
* - If a length is decoded, including its extra bits if any, then it is
* followed a distance code. There are up to 30 distance symbols. Again
* there are many more possible distances (1..32768), so extra bits are added
* to a base value represented by the symbol. The distances 1..4 get their
* own symbol, but the rest require extra bits. The base distances and
* corresponding number of extra bits are below in the static arrays dist[]
* and dext[].
*
* - Literal bytes are simply written to the output. A length/distance pair is
* an instruction to copy previously uncompressed bytes to the output. The
* copy is from distance bytes back in the output stream, copying for length
* bytes.
*
* - Distances pointing before the beginning of the output data are not
* permitted.
*
* - Overlapped copies, where the length is greater than the distance, are
* allowed and common. For example, a distance of one and a length of 258
* simply copies the last byte 258 times. A distance of four and a length of
* twelve copies the last four bytes three times. A simple forward copy
* ignoring whether the length is greater than the distance or not implements
* this correctly. You should not use memcpy() since its behavior is not
* defined for overlapped arrays. You should not use memmove() or bcopy()
* since though their behavior -is- defined for overlapping arrays, it is
* defined to do the wrong thing in this case.
*/
local int codes(struct state *s,
struct huffman *lencode,
struct huffman *distcode)
{
int symbol; /* decoded symbol */
int len; /* length for copy */
unsigned dist; /* distance for copy */
static const short lens[29] = { /* Size base for length codes 257..285 */
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
static const short lext[29] = { /* Extra bits for length codes 257..285 */
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
static const short dists[30] = { /* Offset base for distance codes 0..29 */
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
8193, 12289, 16385, 24577};
static const short dext[30] = { /* Extra bits for distance codes 0..29 */
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13};
/* decode literals and length/distance pairs */
do {
symbol = decode(s, lencode);
if (symbol < 0) return symbol; /* invalid symbol */
if (symbol < 256) { /* literal: symbol is the byte */
/* write out the literal */
if (s->out != NIL) {
if (s->outcnt == s->outlen) return 1;
s->out[s->outcnt] = symbol;
}
s->outcnt++;
}
else if (symbol > 256) { /* length */
/* get and compute length */
symbol -= 257;
if (symbol >= 29) return -9; /* invalid fixed code */
len = lens[symbol] + bits(s, lext[symbol]);
/* get and check distance */
symbol = decode(s, distcode);
if (symbol < 0) return symbol; /* invalid symbol */
dist = dists[symbol] + bits(s, dext[symbol]);
if (dist > s->outcnt)
return -10; /* distance too far back */
/* copy length bytes from distance bytes back */
if (s->out != NIL) {
if (s->outcnt + len > s->outlen) return 1;
while (len--) {
s->out[s->outcnt] = s->out[s->outcnt - dist];
s->outcnt++;
}
}
else
s->outcnt += len;
}
} while (symbol != 256); /* end of block symbol */
/* done with a valid fixed or dynamic block */
return 0;
}
/*
* Process a fixed codes block.
*
* Format notes:
*
* - This block type can be useful for compressing small amounts of data for
* which the size of the code descriptions in a dynamic block exceeds the
* benefit of custom codes for that block. For fixed codes, no bits are
* spent on code descriptions. Instead the code lengths for literal/length
* codes and distance codes are fixed. The specific lengths for each symbol
* can be seen in the "for" loops below.
*
* - The literal/length code is complete, but has two symbols that are invalid
* and should result in an error if received. This cannot be implemented
* simply as an incomplete code since those two symbols are in the "middle"
* of the code. They are eight bits long and the longest literal/length\
* code is nine bits. Therefore the code must be constructed with those
* symbols, and the invalid symbols must be detected after decoding.
*
* - The fixed distance codes also have two invalid symbols that should result
* in an error if received. Since all of the distance codes are the same
* length, this can be implemented as an incomplete code. Then the invalid
* codes are detected while decoding.
*/
local int fixed(struct state *s)
{
static int virgin = 1;
static short lencnt[MAXBITS+1], lensym[FIXLCODES];
static short distcnt[MAXBITS+1], distsym[MAXDCODES];
static struct huffman lencode = {lencnt, lensym};
static struct huffman distcode = {distcnt, distsym};
/* build fixed huffman tables if first call (may not be thread safe) */
if (virgin) {
int symbol;
short lengths[FIXLCODES];
/* literal/length table */
for (symbol = 0; symbol < 144; symbol++)
lengths[symbol] = 8;
for (; symbol < 256; symbol++)
lengths[symbol] = 9;
for (; symbol < 280; symbol++)
lengths[symbol] = 7;
for (; symbol < FIXLCODES; symbol++)
lengths[symbol] = 8;
construct(&lencode, lengths, FIXLCODES);
/* distance table */
for (symbol = 0; symbol < MAXDCODES; symbol++)
lengths[symbol] = 5;
construct(&distcode, lengths, MAXDCODES);
/* do this just once */
virgin = 0;
}
/* decode data until end-of-block code */
return codes(s, &lencode, &distcode);
}
/*
* Process a dynamic codes block.
*
* Format notes:
*
* - A dynamic block starts with a description of the literal/length and
* distance codes for that block. New dynamic blocks allow the compressor to
* rapidly adapt to changing data with new codes optimized for that data.
*
* - The codes used by the deflate format are "canonical", which means that
* the actual bits of the codes are generated in an unambiguous way simply
* from the number of bits in each code. Therefore the code descriptions
* are simply a list of code lengths for each symbol.
*
* - The code lengths are stored in order for the symbols, so lengths are
* provided for each of the literal/length symbols, and for each of the
* distance symbols.
*
* - If a symbol is not used in the block, this is represented by a zero as
* as the code length. This does not mean a zero-length code, but rather
* that no code should be created for this symbol. There is no way in the
* deflate format to represent a zero-length code.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -