📄 puff.c
字号:
* * - The maximum number of bits in a code is 15, so the possible lengths for * any code are 1..15. * * - The fact that a length of zero is not permitted for a code has an * interesting consequence. Normally if only one symbol is used for a given * code, then in fact that code could be represented with zero bits. However * in deflate, that code has to be at least one bit. So for example, if * only a single distance base symbol appears in a block, then it will be * represented by a single code of length one, in particular one 0 bit. This * is an incomplete code, since if a 1 bit is received, it has no meaning, * and should result in an error. So incomplete distance codes of one symbol * should be permitted, and the receipt of invalid codes should be handled. * * - It is also possible to have a single literal/length code, but that code * must be the end-of-block code, since every dynamic block has one. This * is not the most efficient way to create an empty block (an empty fixed * block is fewer bits), but it is allowed by the format. So incomplete * literal/length codes of one symbol should also be permitted. * * - If there are only literal codes and no lengths, then there are no distance * codes. This is represented by one distance code with zero bits. * * - The list of up to 286 length/literal lengths and up to 30 distance lengths * are themselves compressed using Huffman codes and run-length encoding. In * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means * that length, and the symbols 16, 17, and 18 are run-length instructions. * Each of 16, 17, and 18 are follwed by extra bits to define the length of * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10 * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols * are common, hence the special coding for zero lengths. * * - The symbols for 0..18 are Huffman coded, and so that code must be * described first. This is simply a sequence of up to 19 three-bit values * representing no code (0) or the code length for that symbol (1..7). * * - A dynamic block starts with three fixed-size counts from which is computed * the number of literal/length code lengths, the number of distance code * lengths, and the number of code length code lengths (ok, you come up with * a better name!) in the code descriptions. For the literal/length and * distance codes, lengths after those provided are considered zero, i.e. no * code. The code length code lengths are received in a permuted order (see * the order[] array below) to make a short code length code length list more * likely. As it turns out, very short and very long codes are less likely * to be seen in a dynamic code description, hence what may appear initially * to be a peculiar ordering. * * - Given the number of literal/length code lengths (nlen) and distance code * lengths (ndist), then they are treated as one long list of nlen + ndist * code lengths. Therefore run-length coding can and often does cross the * boundary between the two sets of lengths. * * - So to summarize, the code description at the start of a dynamic block is * three counts for the number of code lengths for the literal/length codes, * the distance codes, and the code length codes. This is followed by the * code length code lengths, three bits each. This is used to construct the * code length code which is used to read the remainder of the lengths. Then * the literal/length code lengths and distance lengths are read as a single * set of lengths using the code length codes. Codes are constructed from * the resulting two sets of lengths, and then finally you can start * decoding actual compressed data in the block. * * - For reference, a "typical" size for the code description in a dynamic * block is around 80 bytes. */local int dynamic(struct state *s){ int nlen, ndist, ncode; /* number of lengths in descriptor */ int index; /* index of lengths[] */ int err; /* construct() return value */ short lengths[MAXCODES]; /* descriptor code lengths */ short lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */ short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */ struct huffman lencode = {lencnt, lensym}; /* length code */ struct huffman distcode = {distcnt, distsym}; /* distance code */ static const short order[19] = /* permutation of code length codes */ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; /* get number of lengths in each table, check lengths */ nlen = bits(s, 5) + 257; ndist = bits(s, 5) + 1; ncode = bits(s, 4) + 4; if (nlen > MAXLCODES || ndist > MAXDCODES) return -3; /* bad counts */ /* read code length code lengths (really), missing lengths are zero */ for (index = 0; index < ncode; index++) lengths[order[index]] = bits(s, 3); for (; index < 19; index++) lengths[order[index]] = 0; /* build huffman table for code lengths codes (use lencode temporarily) */ err = construct(&lencode, lengths, 19); if (err != 0) return -4; /* require complete code set here */ /* read length/literal and distance code length tables */ index = 0; while (index < nlen + ndist) { int symbol; /* decoded value */ int len; /* last length to repeat */ symbol = decode(s, &lencode); if (symbol < 16) /* length in 0..15 */ lengths[index++] = symbol; else { /* repeat instruction */ len = 0; /* assume repeating zeros */ if (symbol == 16) { /* repeat last length 3..6 times */ if (index == 0) return -5; /* no last length! */ len = lengths[index - 1]; /* last length */ symbol = 3 + bits(s, 2); } else if (symbol == 17) /* repeat zero 3..10 times */ symbol = 3 + bits(s, 3); else /* == 18, repeat zero 11..138 times */ symbol = 11 + bits(s, 7); if (index + symbol > nlen + ndist) return -6; /* too many lengths! */ while (symbol--) /* repeat last or zero symbol times */ lengths[index++] = len; } } /* build huffman table for literal/length codes */ err = construct(&lencode, lengths, nlen); if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1)) return -7; /* only allow incomplete codes if just one code */ /* build huffman table for distance codes */ err = construct(&distcode, lengths + nlen, ndist); if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1)) return -8; /* only allow incomplete codes if just one code */ /* decode data until end-of-block code */ return codes(s, &lencode, &distcode);}/* * Inflate source to dest. On return, destlen and sourcelen are updated to the * size of the uncompressed data and the size of the deflate data respectively. * On success, the return value of puff() is zero. If there is an error in the * source data, i.e. it is not in the deflate format, then a negative value is * returned. If there is not enough input available or there is not enough * output space, then a positive error is returned. In that case, destlen and * sourcelen are not updated to facilitate retrying from the beginning with the * provision of more input data or more output space. In the case of invalid * inflate data (a negative error), the dest and source pointers are updated to * facilitate the debugging of deflators. * * puff() also has a mode to determine the size of the uncompressed output with * no output written. For this dest must be (unsigned char *)0. In this case, * the input value of *destlen is ignored, and on return *destlen is set to the * size of the uncompressed output. * * The return codes are: * * 2: available inflate data did not terminate * 1: output space exhausted before completing inflate * 0: successful inflate * -1: invalid block type (type == 3) * -2: stored block length did not match one's complement * -3: dynamic block code description: too many length or distance codes * -4: dynamic block code description: code lengths codes incomplete * -5: dynamic block code description: repeat lengths with no first length * -6: dynamic block code description: repeat more than specified lengths * -7: dynamic block code description: invalid literal/length code lengths * -8: dynamic block code description: invalid distance code lengths * -9: invalid literal/length or distance code in fixed or dynamic block * -10: distance is too far back in fixed or dynamic block * * Format notes: * * - Three bits are read for each block to determine the kind of block and * whether or not it is the last block. Then the block is decoded and the * process repeated if it was not the last block. * * - The leftover bits in the last byte of the deflate data after the last * block (if it was a fixed or dynamic block) are undefined and have no * expected values to check. */int puff(unsigned char *dest, /* pointer to destination pointer */ unsigned long *destlen, /* amount of output space */ unsigned char *source, /* pointer to source data pointer */ unsigned long *sourcelen) /* amount of input available */{ struct state s; /* input/output state */ int last, type; /* block information */ int err; /* return value */ /* initialize output state */ s.out = dest; s.outlen = *destlen; /* ignored if dest is NIL */ s.outcnt = 0; /* initialize input state */ s.in = source; s.inlen = *sourcelen; s.incnt = 0; s.bitbuf = 0; s.bitcnt = 0; /* return if bits() or decode() tries to read past available input */ if (setjmp(s.env) != 0) /* if came back here via longjmp() */ err = 2; /* then skip do-loop, return error */ else { /* process blocks until last block or error */ do { last = bits(&s, 1); /* one if last block */ type = bits(&s, 2); /* block type 0..3 */ err = type == 0 ? stored(&s) : (type == 1 ? fixed(&s) : (type == 2 ? dynamic(&s) : -1)); /* type == 3, invalid */ if (err != 0) break; /* return with error */ } while (!last); } /* update the lengths and return */ if (err <= 0) { *destlen = s.outcnt; *sourcelen = s.incnt; } return err;}#ifdef TEST/* Example of how to use puff() */#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#include <sys/stat.h>local unsigned char *yank(char *name, unsigned long *len){ unsigned long size; unsigned char *buf; FILE *in; struct stat s; *len = 0; if (stat(name, &s)) return NULL; if ((s.st_mode & S_IFMT) != S_IFREG) return NULL; size = (unsigned long)(s.st_size); if (size == 0 || (off_t)size != s.st_size) return NULL; in = fopen(name, "r"); if (in == NULL) return NULL; buf = malloc(size); if (buf != NULL && fread(buf, 1, size, in) != size) { free(buf); buf = NULL; } fclose(in); *len = size; return buf;}int main(int argc, char **argv){ int ret; unsigned char *source; unsigned long len, sourcelen, destlen; if (argc < 2) return 2; source = yank(argv[1], &len); if (source == NULL) return 2; sourcelen = len; ret = puff(NIL, &destlen, source, &sourcelen); if (ret) printf("puff() failed with return code %d\n", ret); else { printf("puff() succeeded uncompressing %lu bytes\n", destlen); if (sourcelen < len) printf("%lu compressed bytes unused\n", len - sourcelen); } free(source); return ret;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -