deflate.c

来自「这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易」· C语言代码 · 共 1,360 行 · 第 1/2 页
1,360 行
#include <u.h>#include <libc.h>#include <flate.h>typedef struct Chain	Chain;typedef struct Chains	Chains;typedef struct Dyncode	Dyncode;typedef struct Huff	Huff;typedef struct LZblock	LZblock;typedef struct LZstate	LZstate;enum{	/*	 * deflate format paramaters	 */	DeflateUnc	= 0,			/* uncompressed block */	DeflateFix	= 1,			/* fixed huffman codes */	DeflateDyn	= 2,			/* dynamic huffman codes */	DeflateEob	= 256,			/* end of block code in lit/len book */	DeflateMaxBlock	= 64*1024-1,		/* maximum size of uncompressed block */	DeflateMaxExp	= 10,			/* maximum expansion for a block */	LenStart	= 257,			/* start of length codes in litlen */	Nlitlen		= 288,			/* number of litlen codes */	Noff		= 30,			/* number of offset codes */	Nclen		= 19,			/* number of codelen codes */	MaxOff		= 32*1024,	MinMatch	= 3,			/* shortest match possible */	MaxMatch	= 258,			/* longest match possible */	/*	 * huffman code paramaters	 */	MaxLeaf		= Nlitlen,	MaxHuffBits	= 16,			/* max bits in a huffman code */	ChainMem	= 2 * (MaxHuffBits - 1) * MaxHuffBits,	/*	 * coding of the lz parse	 */	LenFlag		= 1 << 3,	LenShift	= 4,			/* leaves enough space for MinMatchMaxOff */	MaxLitRun	= LenFlag - 1,	/*	 * internal lz paramaters	 */	DeflateOut	= 4096,			/* output buffer size */	BlockSize	= 8192,			/* attempted input read quanta */	DeflateBlock	= DeflateMaxBlock & ~(BlockSize - 1),	MinMatchMaxOff	= 4096,			/* max profitable offset for small match;						 * assumes 8 bits for len, 5+10 for offset						 * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS						 */	HistSlop	= 512,			/* must be at lead MaxMatch */	HistBlock	= 64*1024,	HistSize	= HistBlock + HistSlop,	HashLog		= 13,	HashSize	= 1<<HashLog,	MaxOffCode	= 256,			/* biggest offset looked up in direct table */	EstLitBits	= 8,	EstLenBits	= 4,	EstOffBits	= 5,};/* * knuth vol. 3 multiplicative hashing * each byte x chosen according to rules * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 * with reasonable spread between the bytes & their complements * * the 3 byte value appears to be as almost good as the 4 byte value, * and might be faster on some machines *//*#define hashit(c)	(((ulong)(c) * 0x6b43a9) >> (24 - HashLog))*/#define hashit(c)	((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))/* * lempel-ziv style compression state */struct LZstate{	uchar	hist[HistSize];	ulong	pos;				/* current location in history buffer */	ulong	avail;				/* data available after pos */	int	eof;	ushort	hash[HashSize];			/* hash chains */	ushort	nexts[MaxOff];	int	now;				/* pos in hash chains */	int	dot;				/* dawn of time in history */	int	prevlen;			/* lazy matching state */	int	prevoff;	int	maxcheck;			/* compressor tuning */	uchar	obuf[DeflateOut];	uchar	*out;				/* current position in the output buffer */	uchar	*eout;	ulong	bits;				/* bit shift register */	int	nbits;	int	rbad;				/* got an error reading the buffer */	int	wbad;				/* got an error writing the buffer */	int	(*w)(void*, void*, int);	void	*wr;	ulong	totr;				/* total input size */	ulong	totw;				/* total output size */	int	debug;};struct LZblock{	ushort	parse[DeflateMaxBlock / 2 + 1];	int	lastv;				/* value being constucted for parse */	ulong	litlencount[Nlitlen];	ulong	offcount[Noff];	ushort	*eparse;			/* limit for parse table */	int	bytes;				/* consumed from the input */	int	excost;				/* cost of encoding extra len & off bits */};/* * huffman code table */struct Huff{	short	bits;				/* length of the code */	ushort	encode;				/* the code */};/* * encoding of dynamic huffman trees */struct Dyncode{	int	nlit;	int	noff;	int	nclen;	int	ncode;	Huff	codetab[Nclen];	uchar	codes[Nlitlen+Noff];	uchar	codeaux[Nlitlen+Noff];};static	int	deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));static	int	lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);static	void	wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);static	int	bitcost(Huff*, ulong*, int);static	int	huffcodes(Dyncode*, Huff*, Huff*);static	void	wrdyncode(LZstate*, Dyncode*);static	void	lzput(LZstate*, ulong bits, int nbits);static	void	lzflushbits(LZstate*);static	void	lzflush(LZstate *lz);static	void	lzwrite(LZstate *lz, void *buf, int n);static	int	hufftabinit(Huff*, int, ulong*, int);static	int	mkgzprecode(Huff*, ulong *, int, int);static	int	mkprecode(Huff*, ulong *, int, int, ulong*);static	void	nextchain(Chains*, int);static	void	leafsort(ulong*, ushort*, int, int);/* conversion from len to code word */static int lencode[MaxMatch];/* * conversion from off to code word * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]*/static int offcode[MaxOffCode];static int bigoffcode[256];/* litlen code words LenStart-285 extra bits */static int litlenbase[Nlitlen-LenStart];static int litlenextra[Nlitlen-LenStart] ={/* 257 */	0, 0, 0,/* 260 */	0, 0, 0, 0, 0, 1, 1, 1, 1, 2,/* 270 */	2, 2, 2, 3, 3, 3, 3, 4, 4, 4,/* 280 */	4, 5, 5, 5, 5, 0, 0, 0};/* offset code word extra bits */static int offbase[Noff];static int offextra[] ={	0,  0,  0,  0,  1,  1,  2,  2,  3,  3,	4,  4,  5,  5,  6,  6,  7,  7,  8,  8,	9,  9,  10, 10, 11, 11, 12, 12, 13, 13,	0,  0,};/* order code lengths */static int clenorder[Nclen] ={        16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};/* static huffman tables */static	Huff	litlentab[Nlitlen];static	Huff	offtab[Noff];static	Huff	hofftab[Noff];/* bit reversal for brain dead endian swap in huffman codes */static	uchar	revtab[256];static	ulong	nlits;static	ulong	nmatches;intdeflateinit(void){	ulong bitcount[MaxHuffBits];	int i, j, ci, n;	/* byte reverse table */	for(i=0; i<256; i++)		for(j=0; j<8; j++)			if(i & (1<<j))				revtab[i] |= 0x80 >> j;	/* static Litlen bit lengths */	for(i=0; i<144; i++)		litlentab[i].bits = 8;	for(i=144; i<256; i++)		litlentab[i].bits = 9;	for(i=256; i<280; i++)		litlentab[i].bits = 7;	for(i=280; i<Nlitlen; i++)		litlentab[i].bits = 8;	memset(bitcount, 0, sizeof(bitcount));	bitcount[8] += 144 - 0;	bitcount[9] += 256 - 144;	bitcount[7] += 280 - 256;	bitcount[8] += Nlitlen - 280;	if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))		return FlateInternal;	/* static offset bit lengths */	for(i = 0; i < Noff; i++)		offtab[i].bits = 5;	memset(bitcount, 0, sizeof(bitcount));	bitcount[5] = Noff;	if(!hufftabinit(offtab, Noff, bitcount, 5))		return FlateInternal;	bitcount[0] = 0;	bitcount[1] = 0;	if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))		return FlateInternal;	/* conversion tables for lens & offs to codes */	ci = 0;	for(i = LenStart; i < 286; i++){		n = ci + (1 << litlenextra[i - LenStart]);		litlenbase[i - LenStart] = ci;		for(; ci < n; ci++)			lencode[ci] = i;	}	/* patch up special case for len MaxMatch */	lencode[MaxMatch-MinMatch] = 285;	litlenbase[285-LenStart] = MaxMatch-MinMatch;	ci = 0;	for(i = 0; i < 16; i++){		n = ci + (1 << offextra[i]);		offbase[i] = ci;		for(; ci < n; ci++)			offcode[ci] = i;	}	ci = ci >> 7;	for(; i < 30; i++){		n = ci + (1 << (offextra[i] - 7));		offbase[i] = ci << 7;		for(; ci < n; ci++)			bigoffcode[ci] = i;	}	return FlateOk;}static voiddeflatereset(LZstate *lz, int level, int debug){	memset(lz->nexts, 0, sizeof lz->nexts);	memset(lz->hash, 0, sizeof lz->hash);	lz->totr = 0;	lz->totw = 0;	lz->pos = 0;	lz->avail = 0;	lz->out = lz->obuf;	lz->eout = &lz->obuf[DeflateOut];	lz->prevlen = MinMatch - 1;	lz->prevoff = 0;	lz->now = MaxOff + 1;	lz->dot = lz->now;	lz->bits = 0;	lz->nbits = 0;	lz->maxcheck = (1 << level);	lz->maxcheck -= lz->maxcheck >> 2;	if(lz->maxcheck < 2)		lz->maxcheck = 2;	else if(lz->maxcheck > 1024)		lz->maxcheck = 1024;	lz->debug = debug;}intdeflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug){	LZstate *lz;	LZblock *lzb;	int ok;	lz = malloc(sizeof *lz + sizeof *lzb);	if(lz == nil)		return FlateNoMem;	lzb = (LZblock*)&lz[1];	deflatereset(lz, level, debug);	lz->w = w;	lz->wr = wr;	lz->wbad = 0;	lz->rbad = 0;	lz->eof = 0;	ok = FlateOk;	while(!lz->eof || lz->avail){		ok = deflateb(lz, lzb, rr, r);		if(ok != FlateOk)			break;	}	if(ok == FlateOk && lz->rbad)		ok = FlateInputFail;	if(ok == FlateOk && lz->wbad)		ok = FlateOutputFail;	free(lz);	return ok;}static intdeflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)){	Dyncode dyncode, hdyncode;	Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];	ulong litcount[Nlitlen];	long nunc, ndyn, nfix, nhuff;	uchar *slop, *hslop;	ulong ep;	int i, n, m, mm, nslop;	memset(lzb->litlencount, 0, sizeof lzb->litlencount);	memset(lzb->offcount, 0, sizeof lzb->offcount);	lzb->litlencount[DeflateEob]++;	lzb->bytes = 0;	lzb->eparse = lzb->parse;	lzb->lastv = 0;	lzb->excost = 0;	slop = &lz->hist[lz->pos];	n = lz->avail;	while(n < DeflateBlock && (!lz->eof || lz->avail)){		/*		 * fill the buffer as much as possible,		 * while leaving room for MaxOff history behind lz->pos,		 * and not reading more than we can handle.		 *		 * make sure we read at least HistSlop bytes.		 */		if(!lz->eof){			ep = lz->pos + lz->avail;			if(ep >= HistBlock)				ep -= HistBlock;			m = HistBlock - MaxOff - lz->avail;			if(m > HistBlock - n)				m = HistBlock - n;			if(m > (HistBlock + HistSlop) - ep)				m = (HistBlock + HistSlop) - ep;			if(m & ~(BlockSize - 1))				m &= ~(BlockSize - 1);			/*			 * be nice to the caller: stop reads that are too small.			 * can only get here when we've already filled the buffer some			 */			if(m < HistSlop){				if(!m || !lzb->bytes)					return FlateInternal;				break;			}			mm = (*r)(rr, &lz->hist[ep], m);			if(mm > 0){				/*				 * wrap data to end if we're read it from the beginning				 * this way, we don't have to wrap searches.				 *				 * wrap reads past the end to the beginning.				 * this way, we can guarantee minimum size reads.				 */				if(ep < HistSlop)					memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);				else if(ep + mm > HistBlock)					memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);				lz->totr += mm;				n += mm;				lz->avail += mm;			}else{				if(mm < 0)					lz->rbad = 1;				lz->eof = 1;			}		}		ep = lz->pos + lz->avail;		if(ep > HistSize)			ep = HistSize;		if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)			ep = lz->pos + DeflateMaxBlock - lzb->bytes;		m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);		lzb->bytes += m;		lz->pos = (lz->pos + m) & (HistBlock - 1);		lz->avail -= m;	}	if(lzb->lastv)		*lzb->eparse++ = lzb->lastv;	if(lzb->eparse > lzb->parse + nelem(lzb->parse))		return FlateInternal;	nunc = lzb->bytes;	if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)	|| !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))		return FlateInternal;	ndyn = huffcodes(&dyncode, dlitlentab, dofftab);	if(ndyn < 0)		return FlateInternal;	ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)		+ bitcost(dofftab, lzb->offcount, Noff)		+ lzb->excost;	memset(litcount, 0, sizeof litcount);	nslop = nunc;	if(nslop > &lz->hist[HistSize] - slop)		nslop = &lz->hist[HistSize] - slop;	for(i = 0; i < nslop; i++)		litcount[slop[i]]++;	hslop = &lz->hist[HistSlop - nslop];	for(; i < nunc; i++)		litcount[hslop[i]]++;	litcount[DeflateEob]++;	if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))		return FlateInternal;	nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);	if(nhuff < 0)		return FlateInternal;	nhuff += bitcost(hlitlentab, litcount, Nlitlen);	nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)		+ bitcost(offtab, lzb->offcount, Noff)		+ lzb->excost;	lzput(lz, lz->eof && !lz->avail, 1);	if(lz->debug){		fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",			nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);		fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);	}	if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){		lzput(lz, DeflateUnc, 2);		lzflushbits(lz);		lzput(lz, nunc & 0xff, 8);		lzput(lz, (nunc >> 8) & 0xff, 8);		lzput(lz, ~nunc & 0xff, 8);		lzput(lz, (~nunc >> 8) & 0xff, 8);		lzflush(lz);		lzwrite(lz, slop, nslop);		lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);	}else if(ndyn < nfix && ndyn < nhuff){		lzput(lz, DeflateDyn, 2);		wrdyncode(lz, &dyncode);		wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);		lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);	}else if(nhuff < nfix){		lzput(lz, DeflateDyn, 2);		wrdyncode(lz, &hdyncode);		m = 0;		for(i = nunc; i > MaxLitRun; i -= MaxLitRun)			lzb->parse[m++] = MaxLitRun;		lzb->parse[m++] = i;		wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);		lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);	}else{		lzput(lz, DeflateFix, 2);		wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);		lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);	}	if(lz->eof && !lz->avail){		lzflushbits(lz);		lzflush(lz);	}	return FlateOk;}static voidlzwrite(LZstate *lz, void *buf, int n){	int nw;	if(n && lz->w){		nw = (*lz->w)(lz->wr, buf, n);		if(nw != n){			lz->w = nil;			lz->wbad = 1;		}else			lz->totw += n;	}}static voidlzflush(LZstate *lz){	lzwrite(lz, lz->obuf, lz->out - lz->obuf);	lz->out = lz->obuf;}static voidlzput(LZstate *lz, ulong bits, int nbits){	bits = (bits << lz->nbits) | lz->bits;	for(nbits += lz->nbits; nbits >= 8; nbits -= 8){		*lz->out++ = bits;		if(lz->out == lz->eout)			lzflush(lz);		bits >>= 8;	}	lz->bits = bits;	lz->nbits = nbits;}static voidlzflushbits(LZstate *lz){	if(lz->nbits)		lzput(lz, 0, 8 - (lz->nbits & 7));}/* * write out a block of n samples, * given lz encoding and counts for huffman tables */static voidwrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab){	ushort *off;	int i, run, offset, lit, len, c;	if(out->debug > 2){		for(off = soff; off < eoff; ){			offset = *off++;			run = offset & MaxLitRun;			if(run){				for(i = 0; i < run; i++){					lit = out->hist[litoff & (HistBlock - 1)];					litoff++;					fprint(2, "\tlit %.2ux %c\n", lit, lit);				}				if(!(offset & LenFlag))					continue;				len = offset >> LenShift;				offset = *off++;			}else if(offset & LenFlag){				len = offset >> LenShift;				offset = *off++;			}else{				len = 0;				offset >>= LenShift;			}			litoff += len + MinMatch;			fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);		}	}	for(off = soff; off < eoff; ){		offset = *off++;		run = offset & MaxLitRun;		if(run){			for(i = 0; i < run; i++){				lit = out->hist[litoff & (HistBlock - 1)];				litoff++;				lzput(out, litlentab[lit].encode, litlentab[lit].bits);			}			if(!(offset & LenFlag))				continue;			len = offset >> LenShift;			offset = *off++;		}else if(offset & LenFlag){			len = offset >> LenShift;			offset = *off++;		}else{			len = 0;			offset >>= LenShift;		}		litoff += len + MinMatch;		c = lencode[len];		lzput(out, litlentab[c].encode, litlentab[c].bits);		c -= LenStart;		if(litlenextra[c])			lzput(out, len - litlenbase[c], litlenextra[c]);		if(offset < MaxOffCode)			c = offcode[offset];		else			c = bigoffcode[offset >> 7];		lzput(out, offtab[c].encode, offtab[c].bits);		if(offextra[c])			lzput(out, offset - offbase[c], offextra[c]);	}}/* * look for the longest, closest string which matches * the next prefix.  the clever part here is looking for * a string 1 longer than the previous best match. * * follows the recommendation of limiting number of chains * which are checked.  this appears to be the best heuristic. */static intlzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m){	uchar *s, *t;	int ml, off, last;	ml = check;	if(runlen >= 8)		check >>= 2;	*m = 0;	if(p + runlen >= es)		return runlen;	last = 0;	for(; check-- > 0; then = nexts[then & (MaxOff-1)]){		off = (ushort)(now - then);		if(off <= last || off > MaxOff)			break;		s = p + runlen;		t = hist + (((p - hist) - off) & (HistBlock-1));		t += runlen;		for(; s >= p; s--){			if(*s != *t)				goto matchloop;			t--;		}		/*
deflate.c - 源码说明

本页面展示了「这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解」中的 deflate.c 源码文件，采用 C语言编程语言编写，共 1,360 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与UNIX相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?