compress.c

来自「这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易」· C语言代码 · 共 1,243 行 · 第 1/3 页
1,243 行
		if ((getchar()!=(magic_header[0] & 0xFF))		 || (getchar()!=(magic_header[1] & 0xFF))) {			fprintf(stderr, "stdin: not in compressed format\n");			exit(1);		}		maxbits = getchar();	/* set -b from file */		block_compress = maxbits & BLOCK_MASK;		maxbits &= BIT_MASK;		maxmaxcode = 1 << maxbits;		fsize = 100000;		/* assume stdin large for USERMEM */		if(maxbits > BITS) {			fprintf(stderr,			"stdin: compressed with %d bits, can only handle %d bits\n",			maxbits, BITS);			exit(1);		}		}#ifndef DEBUG		decompress();#else		if (debug == 0)			decompress();		else			printcodes();		if (verbose)			dump_tab();#endif						/* DEBUG */	}	}	exit(exit_stat);}static int offset;long in_count = 1;		/* length of input */long bytes_out;			/* length of compressed output */long out_count = 0;		/* # of codes output (for debugging) *//* * compress stdin to stdout * * Algorithm:  use open addressing double hashing (no chaining) on the * prefix code / next character combination.  We do a variant of Knuth's * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * secondary probe.  Here, the modular division first probe is gives way * to a faster exclusive-or manipulation.  Also do block compression with * an adaptive reset, whereby the code table is cleared when the compression * ratio decreases, but after the table fills.  The variable-length output * codes are re-sized at this point, and a special CLEAR code is generated * for the decompressor.  Late addition:  construct the table according to * file size for noticeable speed improvement on small files.  Please direct * questions about this implementation to ames!jaw. */compress(){	code_int ent, hsize_reg;	code_int i = 0;	int c, disp, hshift;	long fcode;	if (nomagic == 0) {		putchar(magic_header[0]);		putchar(magic_header[1]);		putchar((char)(maxbits | block_compress));		if(ferror(stdout))			writeerr();	}	offset = 0;	bytes_out = 3;		/* includes 3-byte header mojo */	out_count = 0;	clear_flg = 0;	ratio = 0;	in_count = 1;	checkpoint = CHECK_GAP;	maxcode = MAXCODE(n_bits = INIT_BITS);	free_ent = (block_compress? FIRST: 256);	ent = getchar ();	hshift = 0;	for (fcode = (long)hsize;  fcode < 65536L; fcode *= 2)		hshift++;	hshift = 8 - hshift;		/* set hash code range bound */	hsize_reg = hsize;	cl_hash( (count_int) hsize_reg);		/* clear hash table */	while ((c = getchar()) != EOF) {		in_count++;		fcode = (long) (((long) c << maxbits) + ent);	 	i = ((c << hshift) ^ ent);	/* xor hashing */		if (htabof (i) == fcode) {			ent = codetabof(i);			continue;		} else if ((long)htabof(i) < 0 )	/* empty slot */			goto nomatch;	 	disp = hsize_reg - i;	/* secondary hash (after G. Knott) */		if (i == 0)			disp = 1;probe:		if ((i -= disp) < 0)			i += hsize_reg;		if (htabof (i) == fcode) {			ent = codetabof(i);			continue;		}		if ((long)htabof(i) > 0)			goto probe;nomatch:		output((code_int)ent);		out_count++;	 	ent = c;		if (free_ent < maxmaxcode) {	 		codetabof(i) = free_ent++;	/* code -> hashtable */			htabof(i) = fcode;		} else if ((count_int)in_count >= checkpoint && block_compress)			cl_block ();	}	/*	* Put out the final code.	*/	output( (code_int)ent );	out_count++;	output( (code_int)-1 );	/*	* Print out stats on stderr	*/	if(zcat_flg == 0 && !quiet) {#ifdef DEBUG	fprintf( stderr,		"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",		in_count, out_count, bytes_out );	prratio( stderr, in_count, bytes_out );	fprintf( stderr, "\n");	fprintf( stderr, "\tCompression as in compact: " );	prratio( stderr, in_count-bytes_out, in_count );	fprintf( stderr, "\n");	fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",		free_ent - 1, n_bits );#else /* !DEBUG */	fprintf( stderr, "Compression: " );	prratio( stderr, in_count-bytes_out, in_count );#endif /* DEBUG */	}	if(bytes_out > in_count)	/* exit(2) if no savings */		exit_stat = 2;}/* * TAG( output ) * * Output the given code. * Inputs: * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes *		that n_bits =< (long)wordsize - 1. * Outputs: * 	Outputs code to the file. * Assumptions: *	Chars are 8 bits long. * Algorithm: * 	Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly).  When the buffer fills up empty it and start over. */static char buf[BITS];uchar lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};uchar rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};output( code )code_int  code;{#ifdef DEBUG	static int col = 0;#endif	int r_off = offset, bits= n_bits;	char *bp = buf;#ifdef DEBUG	if (verbose)		fprintf(stderr, "%5d%c", code,			(col+=6) >= 74? (col = 0, '\n'): ' ');#endif	if (code >= 0) {		/*		 * byte/bit numbering on the VAX is simulated by the		 * following code		 */		/*		 * Get to the first byte.		 */		bp += (r_off >> 3);		r_off &= 7;		/*		 * Since code is always >= 8 bits, only need to mask the first		 * hunk on the left.		 */		*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];		bp++;		bits -=  8 - r_off;		code >>= 8 - r_off;		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */		if ( bits >= 8 ) {			*bp++ = code;			code >>= 8;			bits -= 8;		}		/* Last bits. */		if(bits)			*bp = code;		offset += n_bits;		if ( offset == (n_bits << 3) ) {			bp = buf;			bits = n_bits;			bytes_out += bits;			do {				putchar(*bp++);			} while(--bits);			offset = 0;		}		/*		 * If the next entry is going to be too big for the code size,		 * then increase it, if possible.		 */		if ( free_ent > maxcode || (clear_flg > 0)) {			/*			* Write the whole buffer, because the input side won't			* discover the size increase until after it has read it.			*/			if ( offset > 0 ) {				if( fwrite( buf, 1, n_bits, stdout ) != n_bits)					writeerr();				bytes_out += n_bits;			}			offset = 0;			if ( clear_flg ) {				maxcode = MAXCODE (n_bits = INIT_BITS);				clear_flg = 0;			} else {				n_bits++;				if ( n_bits == maxbits )					maxcode = maxmaxcode;				else					maxcode = MAXCODE(n_bits);			}#ifdef DEBUG			if ( debug ) {				fprintf(stderr,					"\nChange to %d bits\n", n_bits);				col = 0;			}#endif		}	} else {		/*		 * At EOF, write the rest of the buffer.		 */		if ( offset > 0 )			fwrite( buf, 1, (offset + 7) / 8, stdout );		bytes_out += (offset + 7) / 8;		offset = 0;		fflush( stdout );#ifdef DEBUG		if ( verbose )			fprintf( stderr, "\n" );#endif		if( ferror( stdout ) )			writeerr();	}}/* * Decompress stdin to stdout.  This routine adapts to the codes in the * file building the "string" table on-the-fly; requiring no table to * be stored in the compressed file.  The tables used herein are shared * with those of the compress() routine.  See the definitions above. */decompress(){	int finchar;	code_int code, oldcode, incode;	uchar *stackp;	/*	* As above, initialize the first 256 entries in the table.	*/	maxcode = MAXCODE(n_bits = INIT_BITS);	for (code = 255; code >= 0; code--) {		tab_prefixof(code) = 0;		tab_suffixof(code) = (uchar)code;	}	free_ent = (block_compress? FIRST: 256);	finchar = oldcode = getcode();	if(oldcode == -1)		/* EOF already? */		return;			/* Get out of here */	putchar((char)finchar);		/* first code must be 8 bits = char */	if(ferror(stdout))		/* Crash if can't write */		writeerr();	stackp = de_stack;	while ((code = getcode()) > -1) {		if ((code == CLEAR) && block_compress) {			for (code = 255; code >= 0; code--)				tab_prefixof(code) = 0;			clear_flg = 1;			free_ent = FIRST - 1;			if ((code = getcode()) == -1)	/* O, untimely death! */				break;		}		incode = code;		/*		 * Special case for KwKwK string.		 */		if (code >= free_ent) {			*stackp++ = finchar;			code = oldcode;		}		/*		 * Generate output characters in reverse order		 */		while (code >= 256) {			*stackp++ = tab_suffixof(code);			code = tab_prefixof(code);		}		*stackp++ = finchar = tab_suffixof(code);		/*		 * And put them out in forward order		 */		do {			putchar(*--stackp);		} while (stackp > de_stack);		/*		 * Generate the new entry.		 */		if ( (code=free_ent) < maxmaxcode ) {			tab_prefixof(code) = (ushort)oldcode;			tab_suffixof(code) = finchar;			free_ent = code+1;		}		/*		 * Remember previous code.		 */		oldcode = incode;	}	fflush(stdout);	if(ferror(stdout))		writeerr();}/* * TAG( getcode ) * * Read one code from the standard input.  If EOF, return -1. * Inputs: * 	stdin * Outputs: * 	code or -1 is returned. */code_intgetcode(){	int r_off, bits;	code_int code;	static int offset = 0, size = 0;	static uchar buf[BITS];	uchar *bp = buf;	if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {		/*		 * If the next entry will be too big for the current code		 * size, then we must increase the size.  This implies reading		 * a new buffer full, too.		 */		if ( free_ent > maxcode ) {			n_bits++;			if ( n_bits == maxbits )				maxcode = maxmaxcode; /* won't get any bigger now */			else				maxcode = MAXCODE(n_bits);		}		if ( clear_flg > 0) {			maxcode = MAXCODE(n_bits = INIT_BITS);			clear_flg = 0;		}		size = fread(buf, 1, n_bits, stdin);		if (size <= 0)			return -1;			/* end of file */		offset = 0;		/* Round size down to integral number of codes */		size = (size << 3) - (n_bits - 1);	}	r_off = offset;	bits = n_bits;	/*	 * Get to the first byte.	 */	bp += (r_off >> 3);	r_off &= 7;	/* Get first part (low order bits) */	code = (*bp++ >> r_off);	bits -= (8 - r_off);	r_off = 8 - r_off;		/* now, offset into code word */	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */	if (bits >= 8) {		code |= *bp++ << r_off;		r_off += 8;
compress.c - 源码说明

本页面展示了「这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解」中的 compress.c 源码文件，采用 C语言编程语言编写，共 1,243 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与UNIX相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?