📄 arclzw.c

📁 汇编源代码大全
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * $Header: arclzw.c,v 1.4 88/06/01 16:27:59 hyc Locked $ *//* * ARC - Archive utility - ARCLZW *  * Version 2.03, created on 10/24/86 at 11:46:22 *  * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED *  * By:  Thom Henderson *  * Description: This file contains the routines used to implement Lempel-Zev * data compression, which calls for building a coding table on the fly. * This form of compression is especially good for encoding files which * contain repeated strings, and can often give dramatic improvements over * traditional Huffman SQueezing. *  * Language: Computer Innovations Optimizing C86 *  * Programming notes: In this section I am drawing heavily on the COMPRESS * program from UNIX.  The basic method is taken from "A Technique for High * Performance Data Compression", Terry A. Welch, IEEE Computer Vol 17, No 6 * (June 1984), pp 8-19.  Also see "Knuth's Fundamental Algorithms", Donald * Knuth, Vol 3, Section 6.4. *  * As best as I can tell, this method works by tracing down a hash table of code * strings where each entry has the property: *  * if <string> <char> is in the table then <string> is in the table. */#include <stdio.h>#include "arc.h"void	putc_pak(), abort(), putc_ncr();int	getc_unp();#if	MSDOSchar	*setmem();#elsechar	*memset();#endifstatic void	putcode();/* definitions for older style crunching */#define FALSE    0#define TRUE     !FALSE#define TABSIZE  4096#define NO_PRED  0xFFFF#define EMPTY    0xFFFF#define NOT_FND  0xFFFFstatic unsigned short inbuf;	/* partial input code storage */static int      sp;		/* current stack pointer */struct entry {		/* string table entry format */	char            used;	/* true when this entry is in use */	unsigned char   follower;	/* char following string */	unsigned short  next;	/* ptr to next in collision list */	unsigned short  predecessor;	/* code for preceeding string */};            /* string_tab[TABSIZE];	/* the code string table *//* definitions for the new dynamic Lempel-Zev crunching */#define BITS   12		/* maximum bits per code */#define HSIZE  5003		/* 80% occupancy */#define INIT_BITS 9		/* initial number of bits/code */static int      n_bits;		/* number of bits/code */static int      maxcode;	/* maximum code, given n_bits */#define MAXCODE(n)      ((1<<(n)) - 1)	/* maximum code calculation */static int      maxcodemax = 1 << BITS;	/* largest possible code (+1) */static char     buf[BITS];	/* input/output buffer */static unsigned char lmask[9] =	/* left side masks */{ 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};static unsigned char rmask[9] =	/* right side masks */{ 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};static int      offset;		/* byte offset for code output */static long     in_count;	/* length of input */static long     bytes_out;	/* length of compressed output */static long     bytes_ref;	/* output quality reference */static long     bytes_last;	/* output size at last checkpoint */static unsigned short ent;/* * To save much memory (which we badly need at this point), we overlay the * table used by the previous version of Lempel-Zev with those used by the * new version.  Since no two of these routines will be used together, we can * safely do this. */extern long     htab[HSIZE];		/* hash code table   (crunch) */extern unsigned short codetab[HSIZE];	/* string code table (crunch) */static struct	entry *string_tab=(struct entry *)htab;	/* old crunch string table */static unsigned short *prefix=codetab;	/* prefix code table (uncrunch) */static unsigned char *suffix=(unsigned char *)htab;	/* suffix table (uncrunch) */static int      free_ent;	/* first unused entry */static int      firstcmp;	/* true at start of compression */extern unsigned char stack[HSIZE];	/* local push/pop stack *//* * block compression parameters -- after all codes are used up, and * compression rate changes, start over. */static int      clear_flg;#define CHECK_GAP 2048		/* ratio check interval */static long     checkpoint;void            upd_tab();/* * the next two codes should not be changed lightly, as they must not lie * within the contiguous general code space. */#define FIRST   257		/* first free entry */#define CLEAR   256		/* table clear output code *//* * The cl_block() routine is called at each checkpoint to determine if * compression would likely improve by resetting the code table.  The method * chosen to determine this is based on empirical observation that, in * general, every 2k of input data should compress at least as well as the * first 2k of input. */static          voidcl_block(t)			/* table clear for block compress */	FILE           *t;	/* our output file */{	checkpoint = in_count + CHECK_GAP;	if (bytes_ref) {		if (bytes_out - bytes_last > bytes_ref) {			setmem(htab, HSIZE * sizeof(long), 0xff);			free_ent = FIRST;			clear_flg = 1;			putcode(CLEAR, t);			bytes_ref = 0;		}	} else		bytes_ref = bytes_out - bytes_last;	bytes_last = bytes_out;	/* remember where we were */}/***************************************************************** * * Output a given code. * Inputs: *      code:   A n_bits-bit integer.  If == -1, then EOF.  This assumes *              that n_bits =< (LONG)wordsize - 1. * Outputs: *      Outputs code to the file. * Assumptions: *      Chars are 8 bits long. * Algorithm: *      Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly).  When the buffer fills up empty it and start over. */static          voidputcode(code, t)		/* output a code */	int             code;	/* code to output */	FILE           *t;	/* where to put it */{	int             r_off = offset;	/* right offset */	int             bits = n_bits;	/* bits to go */	char           *bp = buf;	/* buffer pointer */	int             n;	/* index */	register int	ztmp;	if (code >= 0) {	/* if a real code *//* Get to the first byte. */		bp += (r_off >> 3);		r_off &= 7;		/*		 * Since code is always >= 8 bits, only need to mask the		 * first hunk on the left. 		 */		ztmp = (code << r_off) & lmask[r_off];		*bp = (*bp & rmask[r_off]) | ztmp;		bp++;		bits -= (8 - r_off);		code >>= (8 - r_off);		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */		if (bits >= 8) {			*bp++ = code;			code >>= 8;			bits -= 8;		}		/* Last bits. */		if (bits)			*bp = code;		offset += n_bits;		if (offset == (n_bits << 3)) {			bp = buf;			bits = n_bits;			bytes_out += bits;			do				putc_pak(*bp++, t);			while (--bits);			offset = 0;		}		/*		 * If the next entry is going to be too big for the code		 * size, then increase it, if possible. 		 */		if (free_ent > maxcode || clear_flg > 0) {			/*			 * Write the whole buffer, because the input side			 * won't discover the size increase until after			 * it has read it. 			 */			if (offset > 0) {				bp = buf;	/* reset pointer for writing */				bytes_out += n = n_bits;				while (n--)					putc_pak(*bp++, t);			}			offset = 0;			if (clear_flg) {	/* reset if clearing */				maxcode = MAXCODE(n_bits = INIT_BITS);				clear_flg = 0;			} else {/* else use more bits */				n_bits++;				if (n_bits == BITS)					maxcode = maxcodemax;				else					maxcode = MAXCODE(n_bits);			}		}	} else {		/* dump the buffer on EOF */		bytes_out += n = (offset + 7) / 8;		if (offset > 0)			while (n--)				putc_pak(*bp++, t);		offset = 0;	}}/***************************************************************** * * Read one code from the standard input.  If EOF, return -1. * Inputs: *      cmpin * Outputs: *      code or -1 is returned. */static          intgetcode(f)			/* get a code */	FILE           *f;	/* file to get from */{	int             code;	static int      offset = 0, size = 0;	int             r_off, bits;	unsigned char  *bp = (unsigned char *) buf;	if (clear_flg > 0 || offset >= size || free_ent > maxcode) {		/*		 * If the next entry will be too big for the current code		 * size, then we must increase the size. This implies		 * reading a new buffer full, too. 		 */		if (free_ent > maxcode) {			n_bits++;			if (n_bits == BITS)				maxcode = maxcodemax;	/* won't get any bigger							 * now */			else				maxcode = MAXCODE(n_bits);		}		if (clear_flg > 0) {			maxcode = MAXCODE(n_bits = INIT_BITS);			clear_flg = 0;		}		for (size = 0; size < n_bits; size++) {			if ((code = getc_unp(f)) == EOF)				break;			else				buf[size] = (char) code;		}		if (size <= 0)			return -1;	/* end of file */		offset = 0;		/* Round size down to integral number of codes */		size = (size << 3) - (n_bits - 1);	}	r_off = offset;	bits = n_bits;	/*	 * Get to the first byte. 	 */	bp += (r_off >> 3);	r_off &= 7;	/* Get first part (low order bits) */	code = (*bp++ >> r_off);	bits -= 8 - r_off;	r_off = 8 - r_off;	/* now, offset into code word */	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */	if (bits >= 8) {		code |= *bp++ << r_off;		r_off += 8;		bits -= 8;	}	/* high order bits. */	code |= (*bp & rmask[bits]) << r_off;	offset += n_bits;	return code & MAXCODE(BITS);}/* * compress a file *  * Algorithm:  use open addressing double hashing (no chaining) on the prefix * code / next character combination.  We do a variant of Knuth's algorithm D * (vol. 3, sec. 6.4) along with G. Knott's relatively-prime secondary probe. * Here, the modular division first probe is gives way to a faster * exclusive-or manipulation.  Also do block compression with an adaptive * reset, where the code table is cleared when the compression ratio * decreases, but after the table fills.  The variable-length output codes * are re-sized at this point, and a special CLEAR code is generated for the * decompressor. */voidinit_cm(t)			/* initialize for compression */	FILE           *t;	/* where compressed file goes */{	offset = 0;	bytes_out = bytes_last = 1;	bytes_ref = 0;	clear_flg = 0;	in_count = 1;	checkpoint = CHECK_GAP;	maxcode = MAXCODE(n_bits = INIT_BITS);	free_ent = FIRST;	setmem(htab, HSIZE * sizeof(long), 0xff);	n_bits = INIT_BITS;	/* set starting code size */	putc_pak(BITS, t);	/* note our max code length */	firstcmp = 1;		/* next byte will be first */}voidputc_cm(c, t)			/* compress a character */	unsigned char   c;	/* character to compress */	FILE           *t;	/* where to put it */{	static long     fcode;	static int      hshift;	int             i;	int             disp;	if (firstcmp) {		/* special case for first byte */		ent = c;	/* remember first byte */		hshift = 0;		for (fcode = (long) HSIZE; fcode < 65536L; fcode *= 2L)			hshift++;		hshift = 8 - hshift;	/* set hash code range bound */		firstcmp = 0;	/* no longer first */		return;	}	in_count++;	fcode = (long) (((long) c << BITS) + ent);	i = (c << hshift) ^ ent;/* xor hashing */	if (htab[i] == fcode) {		ent = codetab[i];		return;	} else if (htab[i] < 0)	/* empty slot */		goto nomatch;	disp = HSIZE - i;	/* secondary hash (after G.Knott) */	if (i == 0)		disp = 1;probe:
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -