📄 puff.c
字号:
/* * puff.c * Copyright (C) 2002-2004 Mark Adler * For conditions of distribution and use, see copyright notice in puff.h * version 1.8, 9 Jan 2004 * * puff.c is a simple inflate written to be an unambiguous way to specify the * deflate format. It is not written for speed but rather simplicity. As a * side benefit, this code might actually be useful when small code is more * important than speed, such as bootstrap applications. For typical deflate * data, zlib's inflate() is about four times as fast as puff(). zlib's * inflate compiles to around 20K on my machine, whereas puff.c compiles to * around 4K on my machine (a PowerPC using GNU cc). If the faster decode() * function here is used, then puff() is only twice as slow as zlib's * inflate(). * * All dynamically allocated memory comes from the stack. The stack required * is less than 2K bytes. This code is compatible with 16-bit int's and * assumes that long's are at least 32 bits. puff.c uses the short data type, * assumed to be 16 bits, for arrays in order to to conserve memory. The code * works whether integers are stored big endian or little endian. * * In the comments below are "Format notes" that describe the inflate process * and document some of the less obvious aspects of the format. This source * code is meant to supplement RFC 1951, which formally describes the deflate * format: * * http://www.zlib.org/rfc-deflate.html *//* * Change history: * * 1.0 10 Feb 2002 - First version * 1.1 17 Feb 2002 - Clarifications of some comments and notes * - Update puff() dest and source pointers on negative * errors to facilitate debugging deflators * - Remove longest from struct huffman -- not needed * - Simplify offs[] index in construct() * - Add input size and checking, using longjmp() to * maintain easy readability * - Use short data type for large arrays * - Use pointers instead of long to specify source and * destination sizes to avoid arbitrary 4 GB limits * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!), * but leave simple version for readabilty * - Make sure invalid distances detected if pointers * are 16 bits * - Fix fixed codes table error * - Provide a scanning mode for determining size of * uncompressed data * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Jean-loup] * - Add a puff.h file for the interface * - Add braces in puff() for else do [Jean-loup] * - Use indexes instead of pointers for readability * 1.4 31 Mar 2002 - Simplify construct() code set check * - Fix some comments * - Add FIXLCODES #define * 1.5 6 Apr 2002 - Minor comment fixes * 1.6 7 Aug 2002 - Minor format changes * 1.7 3 Mar 2003 - Added test code for distribution * - Added zlib-like license * 1.8 9 Jan 2004 - Added some comments on no distance codes case */#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */#include "puff.h" /* prototype for puff() */#define local static /* for local function definitions */#define NIL ((unsigned char *)0) /* for no output option *//* * Maximums for allocations and loops. It is not useful to change these -- * they are fixed by the deflate format. */#define MAXBITS 15 /* maximum bits in a code */#define MAXLCODES 286 /* maximum number of literal/length codes */#define MAXDCODES 30 /* maximum number of distance codes */#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */#define FIXLCODES 288 /* number of fixed literal/length codes *//* input and output state */struct state { /* output state */ unsigned char *out; /* output buffer */ unsigned long outlen; /* available space at out */ unsigned long outcnt; /* bytes written to out so far */ /* input state */ unsigned char *in; /* input buffer */ unsigned long inlen; /* available input at in */ unsigned long incnt; /* bytes read so far */ int bitbuf; /* bit buffer */ int bitcnt; /* number of bits in bit buffer */ /* input limit error return state for bits() and decode() */ jmp_buf env;};/* * Return need bits from the input stream. This always leaves less than * eight bits in the buffer. bits() works properly for need == 0. * * Format notes: * * - Bits are stored in bytes from the least significant bit to the most * significant bit. Therefore bits are dropped from the bottom of the bit * buffer, using shift right, and new bytes are appended to the top of the * bit buffer, using shift left. */local int bits(struct state *s, int need){ long val; /* bit accumulator (can use up to 20 bits) */ /* load at least need bits into val */ val = s->bitbuf; while (s->bitcnt < need) { if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ val |= (long)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */ s->bitcnt += 8; } /* drop need bits and update buffer, always zero to seven bits left */ s->bitbuf = (int)(val >> need); s->bitcnt -= need; /* return need bits, zeroing the bits above that */ return (int)(val & ((1L << need) - 1));}/* * Process a stored block. * * Format notes: * * - After the two-bit stored block type (00), the stored block length and * stored bytes are byte-aligned for fast copying. Therefore any leftover * bits in the byte that has the last bit of the type, as many as seven, are * discarded. The value of the discarded bits are not defined and should not * be checked against any expectation. * * - The second inverted copy of the stored block length does not have to be * checked, but it's probably a good idea to do so anyway. * * - A stored block can have zero length. This is sometimes used to byte-align * subsets of the compressed data for random access or partial recovery. */local int stored(struct state *s){ unsigned len; /* length of stored block */ /* discard leftover bits from current byte (assumes s->bitcnt < 8) */ s->bitbuf = 0; s->bitcnt = 0; /* get length and check against its one's complement */ if (s->incnt + 4 > s->inlen) return 2; /* not enough input */ len = s->in[s->incnt++]; len |= s->in[s->incnt++] << 8; if (s->in[s->incnt++] != (~len & 0xff) || s->in[s->incnt++] != ((~len >> 8) & 0xff)) return -2; /* didn't match complement! */ /* copy len bytes from in to out */ if (s->incnt + len > s->inlen) return 2; /* not enough input */ if (s->out != NIL) { if (s->outcnt + len > s->outlen) return 1; /* not enough output space */ while (len--) s->out[s->outcnt++] = s->in[s->incnt++]; } else { /* just scanning */ s->outcnt += len; s->incnt += len; } /* done with a valid stored block */ return 0;}/* * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of * each length, which for a canonical code are stepped through in order. * symbol[] are the symbol values in canonical order, where the number of * entries is the sum of the counts in count[]. The decoding process can be * seen in the function decode() below. */struct huffman { short *count; /* number of symbols of each length */ short *symbol; /* canonically ordered symbols */};/* * Decode a code from the stream s using huffman table h. Return the symbol or * a negative value if there is an error. If all of the lengths are zero, i.e. * an empty code, or if the code is incomplete and an invalid code is received, * then -9 is returned after reading MAXBITS bits. * * Format notes: * * - The codes as stored in the compressed data are bit-reversed relative to * a simple integer ordering of codes of the same lengths. Hence below the * bits are pulled from the compressed data one at a time and used to * build the code value reversed from what is in the stream in order to * permit simple integer comparisons for decoding. A table-based decoding * scheme (as used in zlib) does not need to do this reversal. * * - The first code for the shortest length is all zeros. Subsequent codes of * the same length are simply integer increments of the previous code. When * moving up a length, a zero bit is appended to the code. For a complete * code, the last code of the longest length will be all ones. * * - Incomplete codes are handled by this decoder, since they are permitted * in the deflate format. See the format notes for fixed() and dynamic(). */#ifdef SLOWlocal int decode(struct state *s, struct huffman *h){ int len; /* current number of bits in code */ int code; /* len bits being decoded */ int first; /* first code of length len */ int count; /* number of codes of length len */ int index; /* index of first code of length len in symbol table */ code = first = index = 0; for (len = 1; len <= MAXBITS; len++) { code |= bits(s, 1); /* get next bit */ count = h->count[len]; if (code < first + count) /* if length len, return symbol */ return h->symbol[index + (code - first)]; index += count; /* else update for next length */ first += count; first <<= 1; code <<= 1; } return -9; /* ran out of codes */}/* * A faster version of decode() for real applications of this code. It's not * as readable, but it makes puff() twice as fast. And it only makes the code * a few percent larger. */#else /* !SLOW */local int decode(struct state *s, struct huffman *h){ int len; /* current number of bits in code */ int code; /* len bits being decoded */ int first; /* first code of length len */ int count; /* number of codes of length len */ int index; /* index of first code of length len in symbol table */ int bitbuf; /* bits from stream */ int left; /* bits left in next or left to process */ short *next; /* next number of codes */ bitbuf = s->bitbuf; left = s->bitcnt; code = first = index = 0; len = 1; next = h->count + 1; while (1) { while (left--) { code |= bitbuf & 1; bitbuf >>= 1; count = *next++; if (code < first + count) { /* if length len, return symbol */ s->bitbuf = bitbuf; s->bitcnt = (s->bitcnt - len) & 7; return h->symbol[index + (code - first)]; } index += count; /* else update for next length */ first += count; first <<= 1; code <<= 1; len++; } left = (MAXBITS+1) - len; if (left == 0) break; if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ bitbuf = s->in[s->incnt++];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -