📄 puff.c
字号:
if (left > 8) left = 8; } return -9; /* ran out of codes */}#endif /* SLOW *//* * Given the list of code lengths length[0..n-1] representing a canonical * Huffman code for n symbols, construct the tables required to decode those * codes. Those tables are the number of codes of each length, and the symbols * sorted by length, retaining their original order within each length. The * return value is zero for a complete code set, negative for an over- * subscribed code set, and positive for an incomplete code set. The tables * can be used if the return value is zero or positive, but they cannot be used * if the return value is negative. If the return value is zero, it is not * possible for decode() using that table to return an error--any stream of * enough bits will resolve to a symbol. If the return value is positive, then * it is possible for decode() using that table to return an error for received * codes past the end of the incomplete lengths. * * Not used by decode(), but used for error checking, h->count[0] is the number * of the n symbols not in the code. So n - h->count[0] is the number of * codes. This is useful for checking for incomplete codes that have more than * one symbol, which is an error in a dynamic block. * * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS * This is assured by the construction of the length arrays in dynamic() and * fixed() and is not verified by construct(). * * Format notes: * * - Permitted and expected examples of incomplete codes are one of the fixed * codes and any code with a single symbol which in deflate is coded as one * bit instead of zero bits. See the format notes for fixed() and dynamic(). * * - Within a given code length, the symbols are kept in ascending order for * the code bits definition. */local int construct(struct huffman *h, short *length, int n){ int symbol; /* current symbol when stepping through length[] */ int len; /* current length when stepping through h->count[] */ int left; /* number of possible codes left of current length */ short offs[MAXBITS+1]; /* offsets in symbol table for each length */ /* count number of codes of each length */ for (len = 0; len <= MAXBITS; len++) h->count[len] = 0; for (symbol = 0; symbol < n; symbol++) (h->count[length[symbol]])++; /* assumes lengths are within bounds */ if (h->count[0] == n) /* no codes! */ return 0; /* complete, but decode() will fail */ /* check for an over-subscribed or incomplete set of lengths */ left = 1; /* one possible code of zero length */ for (len = 1; len <= MAXBITS; len++) { left <<= 1; /* one more bit, double codes left */ left -= h->count[len]; /* deduct count from possible codes */ if (left < 0) return left; /* over-subscribed--return negative */ } /* left > 0 means incomplete */ /* generate offsets into symbol table for each length for sorting */ offs[1] = 0; for (len = 1; len < MAXBITS; len++) offs[len + 1] = offs[len] + h->count[len]; /* * put symbols in table sorted by length, by symbol order within each * length */ for (symbol = 0; symbol < n; symbol++) if (length[symbol] != 0) h->symbol[offs[length[symbol]]++] = symbol; /* return zero for complete set, positive for incomplete set */ return left;}/* * Decode literal/length and distance codes until an end-of-block code. * * Format notes: * * - Compressed data that is after the block type if fixed or after the code * description if dynamic is a combination of literals and length/distance * pairs terminated by and end-of-block code. Literals are simply Huffman * coded bytes. A length/distance pair is a coded length followed by a * coded distance to represent a string that occurs earlier in the * uncompressed data that occurs again at the current location. * * - Literals, lengths, and the end-of-block code are combined into a single * code of up to 286 symbols. They are 256 literals (0..255), 29 length * symbols (257..285), and the end-of-block symbol (256). * * - There are 256 possible lengths (3..258), and so 29 symbols are not enough * to represent all of those. Lengths 3..10 and 258 are in fact represented * by just a length symbol. Lengths 11..257 are represented as a symbol and * some number of extra bits that are added as an integer to the base length * of the length symbol. The number of extra bits is determined by the base * length symbol. These are in the static arrays below, lens[] for the base * lengths and lext[] for the corresponding number of extra bits. * * - The reason that 258 gets its own symbol is that the longest length is used * often in highly redundant files. Note that 258 can also be coded as the * base value 227 plus the maximum extra value of 31. While a good deflate * should never do this, it is not an error, and should be decoded properly. * * - If a length is decoded, including its extra bits if any, then it is * followed a distance code. There are up to 30 distance symbols. Again * there are many more possible distances (1..32768), so extra bits are added * to a base value represented by the symbol. The distances 1..4 get their * own symbol, but the rest require extra bits. The base distances and * corresponding number of extra bits are below in the static arrays dist[] * and dext[]. * * - Literal bytes are simply written to the output. A length/distance pair is * an instruction to copy previously uncompressed bytes to the output. The * copy is from distance bytes back in the output stream, copying for length * bytes. * * - Distances pointing before the beginning of the output data are not * permitted. * * - Overlapped copies, where the length is greater than the distance, are * allowed and common. For example, a distance of one and a length of 258 * simply copies the last byte 258 times. A distance of four and a length of * twelve copies the last four bytes three times. A simple forward copy * ignoring whether the length is greater than the distance or not implements * this correctly. You should not use memcpy() since its behavior is not * defined for overlapped arrays. You should not use memmove() or bcopy() * since though their behavior -is- defined for overlapping arrays, it is * defined to do the wrong thing in this case. */local int codes(struct state *s, struct huffman *lencode, struct huffman *distcode){ int symbol; /* decoded symbol */ int len; /* length for copy */ unsigned dist; /* distance for copy */ static const short lens[29] = { /* Size base for length codes 257..285 */ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; static const short lext[29] = { /* Extra bits for length codes 257..285 */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; static const short dists[30] = { /* Offset base for distance codes 0..29 */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; static const short dext[30] = { /* Extra bits for distance codes 0..29 */ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; /* decode literals and length/distance pairs */ do { symbol = decode(s, lencode); if (symbol < 0) return symbol; /* invalid symbol */ if (symbol < 256) { /* literal: symbol is the byte */ /* write out the literal */ if (s->out != NIL) { if (s->outcnt == s->outlen) return 1; s->out[s->outcnt] = symbol; } s->outcnt++; } else if (symbol > 256) { /* length */ /* get and compute length */ symbol -= 257; if (symbol >= 29) return -9; /* invalid fixed code */ len = lens[symbol] + bits(s, lext[symbol]); /* get and check distance */ symbol = decode(s, distcode); if (symbol < 0) return symbol; /* invalid symbol */ dist = dists[symbol] + bits(s, dext[symbol]); if (dist > s->outcnt) return -10; /* distance too far back */ /* copy length bytes from distance bytes back */ if (s->out != NIL) { if (s->outcnt + len > s->outlen) return 1; while (len--) { s->out[s->outcnt] = s->out[s->outcnt - dist]; s->outcnt++; } } else s->outcnt += len; } } while (symbol != 256); /* end of block symbol */ /* done with a valid fixed or dynamic block */ return 0;}/* * Process a fixed codes block. * * Format notes: * * - This block type can be useful for compressing small amounts of data for * which the size of the code descriptions in a dynamic block exceeds the * benefit of custom codes for that block. For fixed codes, no bits are * spent on code descriptions. Instead the code lengths for literal/length * codes and distance codes are fixed. The specific lengths for each symbol * can be seen in the "for" loops below. * * - The literal/length code is complete, but has two symbols that are invalid * and should result in an error if received. This cannot be implemented * simply as an incomplete code since those two symbols are in the "middle" * of the code. They are eight bits long and the longest literal/length\ * code is nine bits. Therefore the code must be constructed with those * symbols, and the invalid symbols must be detected after decoding. * * - The fixed distance codes also have two invalid symbols that should result * in an error if received. Since all of the distance codes are the same * length, this can be implemented as an incomplete code. Then the invalid * codes are detected while decoding. */local int fixed(struct state *s){ static int virgin = 1; static short lencnt[MAXBITS+1], lensym[FIXLCODES]; static short distcnt[MAXBITS+1], distsym[MAXDCODES]; static struct huffman lencode = {lencnt, lensym}; static struct huffman distcode = {distcnt, distsym}; /* build fixed huffman tables if first call (may not be thread safe) */ if (virgin) { int symbol; short lengths[FIXLCODES]; /* literal/length table */ for (symbol = 0; symbol < 144; symbol++) lengths[symbol] = 8; for (; symbol < 256; symbol++) lengths[symbol] = 9; for (; symbol < 280; symbol++) lengths[symbol] = 7; for (; symbol < FIXLCODES; symbol++) lengths[symbol] = 8; construct(&lencode, lengths, FIXLCODES); /* distance table */ for (symbol = 0; symbol < MAXDCODES; symbol++) lengths[symbol] = 5; construct(&distcode, lengths, MAXDCODES); /* do this just once */ virgin = 0; } /* decode data until end-of-block code */ return codes(s, &lencode, &distcode);}/* * Process a dynamic codes block. * * Format notes: * * - A dynamic block starts with a description of the literal/length and * distance codes for that block. New dynamic blocks allow the compressor to * rapidly adapt to changing data with new codes optimized for that data. * * - The codes used by the deflate format are "canonical", which means that * the actual bits of the codes are generated in an unambiguous way simply * from the number of bits in each code. Therefore the code descriptions * are simply a list of code lengths for each symbol. * * - The code lengths are stored in order for the symbols, so lengths are * provided for each of the literal/length symbols, and for each of the * distance symbols. * * - If a symbol is not used in the block, this is represented by a zero as * as the code length. This does not mean a zero-length code, but rather * that no code should be created for this symbol. There is no way in the * deflate format to represent a zero-length code.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -