📄 deflate.c
字号:
#include <u.h>#include <libc.h>#include <flate.h>typedef struct Chain Chain;typedef struct Chains Chains;typedef struct Dyncode Dyncode;typedef struct Huff Huff;typedef struct LZblock LZblock;typedef struct LZstate LZstate;enum{ /* * deflate format paramaters */ DeflateUnc = 0, /* uncompressed block */ DeflateFix = 1, /* fixed huffman codes */ DeflateDyn = 2, /* dynamic huffman codes */ DeflateEob = 256, /* end of block code in lit/len book */ DeflateMaxBlock = 64*1024-1, /* maximum size of uncompressed block */ DeflateMaxExp = 10, /* maximum expansion for a block */ LenStart = 257, /* start of length codes in litlen */ Nlitlen = 288, /* number of litlen codes */ Noff = 30, /* number of offset codes */ Nclen = 19, /* number of codelen codes */ MaxOff = 32*1024, MinMatch = 3, /* shortest match possible */ MaxMatch = 258, /* longest match possible */ /* * huffman code paramaters */ MaxLeaf = Nlitlen, MaxHuffBits = 16, /* max bits in a huffman code */ ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits, /* * coding of the lz parse */ LenFlag = 1 << 3, LenShift = 4, /* leaves enough space for MinMatchMaxOff */ MaxLitRun = LenFlag - 1, /* * internal lz paramaters */ DeflateOut = 4096, /* output buffer size */ BlockSize = 8192, /* attempted input read quanta */ DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1), MinMatchMaxOff = 4096, /* max profitable offset for small match; * assumes 8 bits for len, 5+10 for offset * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS */ HistSlop = 512, /* must be at lead MaxMatch */ HistBlock = 64*1024, HistSize = HistBlock + HistSlop, HashLog = 13, HashSize = 1<<HashLog, MaxOffCode = 256, /* biggest offset looked up in direct table */ EstLitBits = 8, EstLenBits = 4, EstOffBits = 5,};/* * knuth vol. 3 multiplicative hashing * each byte x chosen according to rules * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 * with reasonable spread between the bytes & their complements * * the 3 byte value appears to be as almost good as the 4 byte value, * and might be faster on some machines *//*#define hashit(c) (((ulong)(c) * 0x6b43a9) >> (24 - HashLog))*/#define hashit(c) ((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))/* * lempel-ziv style compression state */struct LZstate{ uchar hist[HistSize]; ulong pos; /* current location in history buffer */ ulong avail; /* data available after pos */ int eof; ushort hash[HashSize]; /* hash chains */ ushort nexts[MaxOff]; int now; /* pos in hash chains */ int dot; /* dawn of time in history */ int prevlen; /* lazy matching state */ int prevoff; int maxcheck; /* compressor tuning */ uchar obuf[DeflateOut]; uchar *out; /* current position in the output buffer */ uchar *eout; ulong bits; /* bit shift register */ int nbits; int rbad; /* got an error reading the buffer */ int wbad; /* got an error writing the buffer */ int (*w)(void*, void*, int); void *wr; ulong totr; /* total input size */ ulong totw; /* total output size */ int debug;};struct LZblock{ ushort parse[DeflateMaxBlock / 2 + 1]; int lastv; /* value being constucted for parse */ ulong litlencount[Nlitlen]; ulong offcount[Noff]; ushort *eparse; /* limit for parse table */ int bytes; /* consumed from the input */ int excost; /* cost of encoding extra len & off bits */};/* * huffman code table */struct Huff{ short bits; /* length of the code */ ushort encode; /* the code */};/* * encoding of dynamic huffman trees */struct Dyncode{ int nlit; int noff; int nclen; int ncode; Huff codetab[Nclen]; uchar codes[Nlitlen+Noff]; uchar codeaux[Nlitlen+Noff];};static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));static int lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);static void wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);static int bitcost(Huff*, ulong*, int);static int huffcodes(Dyncode*, Huff*, Huff*);static void wrdyncode(LZstate*, Dyncode*);static void lzput(LZstate*, ulong bits, int nbits);static void lzflushbits(LZstate*);static void lzflush(LZstate *lz);static void lzwrite(LZstate *lz, void *buf, int n);static int hufftabinit(Huff*, int, ulong*, int);static int mkgzprecode(Huff*, ulong *, int, int);static int mkprecode(Huff*, ulong *, int, int, ulong*);static void nextchain(Chains*, int);static void leafsort(ulong*, ushort*, int, int);/* conversion from len to code word */static int lencode[MaxMatch];/* * conversion from off to code word * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]*/static int offcode[MaxOffCode];static int bigoffcode[256];/* litlen code words LenStart-285 extra bits */static int litlenbase[Nlitlen-LenStart];static int litlenextra[Nlitlen-LenStart] ={/* 257 */ 0, 0, 0,/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,/* 280 */ 4, 5, 5, 5, 5, 0, 0, 0};/* offset code word extra bits */static int offbase[Noff];static int offextra[] ={ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 0, 0,};/* order code lengths */static int clenorder[Nclen] ={ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};/* static huffman tables */static Huff litlentab[Nlitlen];static Huff offtab[Noff];static Huff hofftab[Noff];/* bit reversal for brain dead endian swap in huffman codes */static uchar revtab[256];static ulong nlits;static ulong nmatches;intdeflateinit(void){ ulong bitcount[MaxHuffBits]; int i, j, ci, n; /* byte reverse table */ for(i=0; i<256; i++) for(j=0; j<8; j++) if(i & (1<<j)) revtab[i] |= 0x80 >> j; /* static Litlen bit lengths */ for(i=0; i<144; i++) litlentab[i].bits = 8; for(i=144; i<256; i++) litlentab[i].bits = 9; for(i=256; i<280; i++) litlentab[i].bits = 7; for(i=280; i<Nlitlen; i++) litlentab[i].bits = 8; memset(bitcount, 0, sizeof(bitcount)); bitcount[8] += 144 - 0; bitcount[9] += 256 - 144; bitcount[7] += 280 - 256; bitcount[8] += Nlitlen - 280; if(!hufftabinit(litlentab, Nlitlen, bitcount, 9)) return FlateInternal; /* static offset bit lengths */ for(i = 0; i < Noff; i++) offtab[i].bits = 5; memset(bitcount, 0, sizeof(bitcount)); bitcount[5] = Noff; if(!hufftabinit(offtab, Noff, bitcount, 5)) return FlateInternal; bitcount[0] = 0; bitcount[1] = 0; if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits)) return FlateInternal; /* conversion tables for lens & offs to codes */ ci = 0; for(i = LenStart; i < 286; i++){ n = ci + (1 << litlenextra[i - LenStart]); litlenbase[i - LenStart] = ci; for(; ci < n; ci++) lencode[ci] = i; } /* patch up special case for len MaxMatch */ lencode[MaxMatch-MinMatch] = 285; litlenbase[285-LenStart] = MaxMatch-MinMatch; ci = 0; for(i = 0; i < 16; i++){ n = ci + (1 << offextra[i]); offbase[i] = ci; for(; ci < n; ci++) offcode[ci] = i; } ci = ci >> 7; for(; i < 30; i++){ n = ci + (1 << (offextra[i] - 7)); offbase[i] = ci << 7; for(; ci < n; ci++) bigoffcode[ci] = i; } return FlateOk;}static voiddeflatereset(LZstate *lz, int level, int debug){ memset(lz->nexts, 0, sizeof lz->nexts); memset(lz->hash, 0, sizeof lz->hash); lz->totr = 0; lz->totw = 0; lz->pos = 0; lz->avail = 0; lz->out = lz->obuf; lz->eout = &lz->obuf[DeflateOut]; lz->prevlen = MinMatch - 1; lz->prevoff = 0; lz->now = MaxOff + 1; lz->dot = lz->now; lz->bits = 0; lz->nbits = 0; lz->maxcheck = (1 << level); lz->maxcheck -= lz->maxcheck >> 2; if(lz->maxcheck < 2) lz->maxcheck = 2; else if(lz->maxcheck > 1024) lz->maxcheck = 1024; lz->debug = debug;}intdeflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug){ LZstate *lz; LZblock *lzb; int ok; lz = malloc(sizeof *lz + sizeof *lzb); if(lz == nil) return FlateNoMem; lzb = (LZblock*)&lz[1]; deflatereset(lz, level, debug); lz->w = w; lz->wr = wr; lz->wbad = 0; lz->rbad = 0; lz->eof = 0; ok = FlateOk; while(!lz->eof || lz->avail){ ok = deflateb(lz, lzb, rr, r); if(ok != FlateOk) break; } if(ok == FlateOk && lz->rbad) ok = FlateInputFail; if(ok == FlateOk && lz->wbad) ok = FlateOutputFail; free(lz); return ok;}static intdeflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)){ Dyncode dyncode, hdyncode; Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen]; ulong litcount[Nlitlen]; long nunc, ndyn, nfix, nhuff; uchar *slop, *hslop; ulong ep; int i, n, m, mm, nslop; memset(lzb->litlencount, 0, sizeof lzb->litlencount); memset(lzb->offcount, 0, sizeof lzb->offcount); lzb->litlencount[DeflateEob]++; lzb->bytes = 0; lzb->eparse = lzb->parse; lzb->lastv = 0; lzb->excost = 0; slop = &lz->hist[lz->pos]; n = lz->avail; while(n < DeflateBlock && (!lz->eof || lz->avail)){ /* * fill the buffer as much as possible, * while leaving room for MaxOff history behind lz->pos, * and not reading more than we can handle. * * make sure we read at least HistSlop bytes. */ if(!lz->eof){ ep = lz->pos + lz->avail; if(ep >= HistBlock) ep -= HistBlock; m = HistBlock - MaxOff - lz->avail; if(m > HistBlock - n) m = HistBlock - n; if(m > (HistBlock + HistSlop) - ep) m = (HistBlock + HistSlop) - ep; if(m & ~(BlockSize - 1)) m &= ~(BlockSize - 1); /* * be nice to the caller: stop reads that are too small. * can only get here when we've already filled the buffer some */ if(m < HistSlop){ if(!m || !lzb->bytes) return FlateInternal; break; } mm = (*r)(rr, &lz->hist[ep], m); if(mm > 0){ /* * wrap data to end if we're read it from the beginning * this way, we don't have to wrap searches. * * wrap reads past the end to the beginning. * this way, we can guarantee minimum size reads. */ if(ep < HistSlop) memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep); else if(ep + mm > HistBlock) memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock); lz->totr += mm; n += mm; lz->avail += mm; }else{ if(mm < 0) lz->rbad = 1; lz->eof = 1; } } ep = lz->pos + lz->avail; if(ep > HistSize) ep = HistSize; if(lzb->bytes + ep - lz->pos > DeflateMaxBlock) ep = lz->pos + DeflateMaxBlock - lzb->bytes; m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof); lzb->bytes += m; lz->pos = (lz->pos + m) & (HistBlock - 1); lz->avail -= m; } if(lzb->lastv) *lzb->eparse++ = lzb->lastv; if(lzb->eparse > lzb->parse + nelem(lzb->parse)) return FlateInternal; nunc = lzb->bytes; if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits) || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits)) return FlateInternal; ndyn = huffcodes(&dyncode, dlitlentab, dofftab); if(ndyn < 0) return FlateInternal; ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen) + bitcost(dofftab, lzb->offcount, Noff) + lzb->excost; memset(litcount, 0, sizeof litcount); nslop = nunc; if(nslop > &lz->hist[HistSize] - slop) nslop = &lz->hist[HistSize] - slop; for(i = 0; i < nslop; i++) litcount[slop[i]]++; hslop = &lz->hist[HistSlop - nslop]; for(; i < nunc; i++) litcount[hslop[i]]++; litcount[DeflateEob]++; if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits)) return FlateInternal; nhuff = huffcodes(&hdyncode, hlitlentab, hofftab); if(nhuff < 0) return FlateInternal; nhuff += bitcost(hlitlentab, litcount, Nlitlen); nfix = bitcost(litlentab, lzb->litlencount, Nlitlen) + bitcost(offtab, lzb->offcount, Noff) + lzb->excost; lzput(lz, lz->eof && !lz->avail, 1); if(lz->debug){ fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n", nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff); fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail); } if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){ lzput(lz, DeflateUnc, 2); lzflushbits(lz); lzput(lz, nunc & 0xff, 8); lzput(lz, (nunc >> 8) & 0xff, 8); lzput(lz, ~nunc & 0xff, 8); lzput(lz, (~nunc >> 8) & 0xff, 8); lzflush(lz); lzwrite(lz, slop, nslop); lzwrite(lz, &lz->hist[HistSlop], nunc - nslop); }else if(ndyn < nfix && ndyn < nhuff){ lzput(lz, DeflateDyn, 2); wrdyncode(lz, &dyncode); wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab); lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits); }else if(nhuff < nfix){ lzput(lz, DeflateDyn, 2); wrdyncode(lz, &hdyncode); m = 0; for(i = nunc; i > MaxLitRun; i -= MaxLitRun) lzb->parse[m++] = MaxLitRun; lzb->parse[m++] = i; wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab); lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits); }else{ lzput(lz, DeflateFix, 2); wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab); lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits); } if(lz->eof && !lz->avail){ lzflushbits(lz); lzflush(lz); } return FlateOk;}static voidlzwrite(LZstate *lz, void *buf, int n){ int nw; if(n && lz->w){ nw = (*lz->w)(lz->wr, buf, n); if(nw != n){ lz->w = nil; lz->wbad = 1; }else lz->totw += n; }}static voidlzflush(LZstate *lz){ lzwrite(lz, lz->obuf, lz->out - lz->obuf); lz->out = lz->obuf;}static voidlzput(LZstate *lz, ulong bits, int nbits){ bits = (bits << lz->nbits) | lz->bits; for(nbits += lz->nbits; nbits >= 8; nbits -= 8){ *lz->out++ = bits; if(lz->out == lz->eout) lzflush(lz); bits >>= 8; } lz->bits = bits; lz->nbits = nbits;}static voidlzflushbits(LZstate *lz){ if(lz->nbits) lzput(lz, 0, 8 - (lz->nbits & 7));}/* * write out a block of n samples, * given lz encoding and counts for huffman tables */static voidwrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab){ ushort *off; int i, run, offset, lit, len, c; if(out->debug > 2){ for(off = soff; off < eoff; ){ offset = *off++; run = offset & MaxLitRun; if(run){ for(i = 0; i < run; i++){ lit = out->hist[litoff & (HistBlock - 1)]; litoff++; fprint(2, "\tlit %.2ux %c\n", lit, lit); } if(!(offset & LenFlag)) continue; len = offset >> LenShift; offset = *off++; }else if(offset & LenFlag){ len = offset >> LenShift; offset = *off++; }else{ len = 0; offset >>= LenShift; } litoff += len + MinMatch; fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch); } } for(off = soff; off < eoff; ){ offset = *off++; run = offset & MaxLitRun; if(run){ for(i = 0; i < run; i++){ lit = out->hist[litoff & (HistBlock - 1)]; litoff++; lzput(out, litlentab[lit].encode, litlentab[lit].bits); } if(!(offset & LenFlag)) continue; len = offset >> LenShift; offset = *off++; }else if(offset & LenFlag){ len = offset >> LenShift; offset = *off++; }else{ len = 0; offset >>= LenShift; } litoff += len + MinMatch; c = lencode[len]; lzput(out, litlentab[c].encode, litlentab[c].bits); c -= LenStart; if(litlenextra[c]) lzput(out, len - litlenbase[c], litlenextra[c]); if(offset < MaxOffCode) c = offcode[offset]; else c = bigoffcode[offset >> 7]; lzput(out, offtab[c].encode, offtab[c].bits); if(offextra[c]) lzput(out, offset - offbase[c], offextra[c]); }}/* * look for the longest, closest string which matches * the next prefix. the clever part here is looking for * a string 1 longer than the previous best match. * * follows the recommendation of limiting number of chains * which are checked. this appears to be the best heuristic. */static intlzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m){ uchar *s, *t; int ml, off, last; ml = check; if(runlen >= 8) check >>= 2; *m = 0; if(p + runlen >= es) return runlen; last = 0; for(; check-- > 0; then = nexts[then & (MaxOff-1)]){ off = (ushort)(now - then); if(off <= last || off > MaxOff) break; s = p + runlen; t = hist + (((p - hist) - off) & (HistBlock-1)); t += runlen; for(; s >= p; s--){ if(*s != *t) goto matchloop; t--; } /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -