lzw.cpp

来自「该软件用lzw算法完成对文件的压缩和解压缩」· C++ 代码 · 共 378 行
CPP
378 行
// LZW.cpp: 实现CLZW类.
//
#include "LZW.h"

//---------------------------------------------------------------------------
CLZW::CLZW()
{
	hash_code   = 0;
	hash_prefix = 0;
	hash_suffix = 0;
	symbol_head = 0;
	symbol_tail = 0;
	symbol_stack= 0;
}

//---------------------------------------------------------------------------
CLZW::~CLZW()
{
	if(hash_code)  	 free(hash_code);     
	if(hash_prefix)  free(hash_prefix);  
	if(hash_suffix)  free(hash_suffix);  
	if(symbol_head)  free(symbol_head);  
	if(symbol_tail)  free(symbol_tail);  
	if(symbol_stack) free(symbol_stack); 
}

//---------------------------------------------------------------------------
// 一个零长度的块标志数据块序列的结束.
int CLZW::GetDataBlock (char *buf)
{
	int count;
	if ((count = getc(infile)) != EOF)
	{
		if (count > 0) {
			if (fread(buf, 1, count,infile)!=(size_t)count) 
				return -1;
		}
	}
	
	return count;
}

//---------------------------------------------------------------------------
// 跳过一序列数据块直到找到块结尾. 
void  CLZW::SkipDataBlocks ()
{
	char buf[256];
	while (GetDataBlock(buf) > 0);
}

//---------------------------------------------------------------------------
// （重新）初始化LZW 状态. 
void CLZW::ReInitLZW()
{
	n_bits = init_bits;
	maxcode = ClearCode << 1;	    // 2^code_size. 
	code_counter = ClearCode + 2; 	// 第一个未用的代码值. 
	sp = symbol_stack;		        // 初始化堆栈使其为空.
}

//---------------------------------------------------------------------------
// 初始化,以便 LZWReadByte和GetCode的调用. 
void CLZW::InitLZWCode (FILE* file, int ibits)
{
	printf("decompress size  %d\n",ibits);
	// GetCode 初始化. 
	symbol_head  = (code_int *)malloc(LZW_TABLE_SIZE * sizeof(code_int));
	symbol_tail  = (UINT8 *) malloc(LZW_TABLE_SIZE * sizeof(UINT8));
	symbol_stack = (UINT8 *) malloc(LZW_TABLE_SIZE * sizeof(UINT8));
	infile=file;
	
	init_bits=ibits;
	
	last_byte = 2;	// 保证重新拷贝最后两个字节.
	last_bit = 0;	// 缓冲区为空. 
	cur_bits = 0;	// 缓冲区的第一个字符.
	
	out_of_blocks = false;
	
	// LZWReadByte 初始化. 
	ClearCode = ((code_int) 1 << (init_bits-1));
	EOFCode = ClearCode + 1;	
	first_byte = true;
	ReInitLZW();
}

//---------------------------------------------------------------------------
// 从压缩数据中提取以后的 code_size 个比特,假定code_size 小于16. 
int CLZW::GetCode ()
{
	int offs, ret, count;
	if ( (cur_bits+n_bits) > last_bit) { // 重新装载缓冲区.
		if (out_of_blocks) {
			return EOFCode;		
		}
		// 保持最后两个字节- 假定 code_size <= 16. 
		code_buf[0] = code_buf[last_byte-2];
		code_buf[1] = code_buf[last_byte-1];
		
		// 装载更多的字节; 如果到达终止，设置标识.
		if ((count = GetDataBlock(&code_buf[2])) == 0) {
			out_of_blocks = true;
			return EOFCode;		
		}
		
		// 重置计数器. 
		cur_bits = (cur_bits - last_bit) + 16;
		last_byte = 2 + count;
		last_bit = last_byte * 8;
	}
	
	// 形成积累下一个24 bits的字符. 
	offs = cur_bits >> 3;	// 字节包含 cur_bit. 
	cur_accum = code_buf[offs+2] & 0xFF;
	cur_accum <<= 8;
	cur_accum |= code_buf[offs+1] & 0xFF;
	cur_accum <<= 8;
	cur_accum |= code_buf[offs] & 0xFF;
	
	// 向右积累排列cur_bit, 然后通过掩码得到需要的bits数.
	cur_accum >>= (cur_bits & 7);
	ret = ((int) cur_accum) & ((1 << n_bits) - 1);
	cur_bits += n_bits;
	
	return ret;
}

//---------------------------------------------------------------------------
// 读取一个 LZW 压缩的字节.
int CLZW::LZWReadByte () 
{
	static int oldcode;	   // 前一个 LZW 符. 
	static int firstcode;  // 原码扩展后的第一个字节.
	register int code;	   // 当前工作代码.
	int incode;			   // 保存实际的输入代码.
	
	if (first_byte) {
		first_byte = false;
		code = ClearCode;	
	} else {
		
		// 如果堆栈有以前读过的符号，返回它.
		if (sp > symbol_stack)
			return (int) *(--sp);
		
		// 读入新的符号. 
		code = GetCode();
	}
	
	if (code == ClearCode) {
		ReInitLZW();
		do {
			code = GetCode();
		} while (code == ClearCode);
		if (code > ClearCode) {	    // 保证它是一个未压缩的字节.
			printf("Corrupt data in LZWReadByte check 1.\n");
			code = 0;			
		}
		firstcode = oldcode = code;	// 使 firstcode, oldcode 有效. 
		return code;
	}
	
	if (code == EOFCode) {
		if (! out_of_blocks) {
			SkipDataBlocks();
			out_of_blocks = true;
		}
		
		// 没有足够的数据.
		printf("Premature end of file!\n");
		return 257;
	}
	
	// 得到正常未压缩的字节或LZW 符号. 
	incode = code;		
	if (code >= code_counter) {	
		if (code > code_counter) {
			printf("Corrupt data in LZWReadByte check 2.\n");
			incode = 0;		// 以免在符号表中产生循环.
		}
		*sp++ = (UINT8) firstcode;	
		code = oldcode;
	}
	
	// 如果是一个符号, 把它扩展后放入堆栈.
	while (code >= ClearCode) {
		// 符号尾: 一个简单的字节值. 
		*sp++ = symbol_tail[code];	
		// 符号头: 另一个LZW 符号. 
		code = symbol_head[code];	
	}
	
	// 表示的是一个未被压缩的字节. 
	firstcode = code;		
	
	// 判别表中是否有空间.
	if ((code = code_counter) < LZW_TABLE_SIZE) {
		// 定义一个新符号 = 原来的符号 + 这个符号扩展后的头字节. 
		symbol_head[code] = oldcode;
		symbol_tail[code] = (UINT8) firstcode;
		code_counter++;
		
		// 增加 n_bits. 
		if ((code_counter >= maxcode) && (n_bits < MAX_LZW_BITS)) {
			n_bits++;
			// 保持等于 2^code_size. 
			maxcode <<= 1;	
		}
	}
	
	oldcode = incode;	// 保存最后的输入符号，以便以后使用.
	return firstcode;	// 返回符号扩展后的第一个字节.
}

//---------------------------------------------------------------------------
// flush any accumulated data. 
void CLZW::flush_packet ()
{
	if (bytesinpkt > 0) {		// never write zero-length packet. 
		packetbuf[0] = (char) bytesinpkt++;
		if (fwrite(packetbuf, 1, bytesinpkt,outfile)
			!= (size_t) bytesinpkt)
			printf("Output file write error.\n");
		bytesinpkt = 0;
	}
}

//---------------------------------------------------------------------------
// 在现有的缓冲区增加一个字节，同时在有必要时向磁盘写保存数据.
void CLZW::CHAR_OUT (int c)
{
	packetbuf[++bytesinpkt] = (char) (c);  
    if (bytesinpkt >= 255)
		flush_packet(); 
}

//---------------------------------------------------------------------------
// 发送一个n_bits比特的代码, 
// 用 cur_accum 和 cur_bits 重新组成新的 8-bit 字节. 
void CLZW::output (code_int code)
{
	cur_accum |= ((INT32) code) << cur_bits;
	cur_bits += n_bits;
	
	while (cur_bits >= 8) {
		CHAR_OUT(cur_accum & 0xFF);
		cur_accum >>= 8;
		cur_bits -= 8;
	}
	
	//如果下一个输入比最大的代码大，则增加它,
	// 这样做是为了保证与解压时同步.
	
	if (free_code > maxcode) {
		n_bits++;
		if (n_bits == MAX_LZW_BITS)
			maxcode = LZW_TABLE_SIZE;	
		else
			maxcode = MAXCODE(n_bits);
	}
}

//---------------------------------------------------------------------------
//  清空hash表. 
void CLZW::clear_hash ()
{
	memset((void *) hash_code,0, HSIZE * sizeof(code_int));
}

//---------------------------------------------------------------------------
// 重置压缩并发送一个清除码.
void CLZW::clear_block ()
{
	clear_hash();			        // 删除所有标识符. 
	free_code = ClearCode + 2;
	output(ClearCode);		        // 压缩. 
	n_bits = init_bits;		        // 重置代码长度. 
	maxcode = MAXCODE(n_bits);
}

//---------------------------------------------------------------------------
// 初始化LZW 压缩.
void CLZW::compress_init (FILE* file,int ibits)
{
    printf("compress size  %d\n",ibits);
	// 初始化所有的静态变量,给hash表分配内存. 
    hash_code = (code_int *) malloc(HSIZE * sizeof(code_int));
    hash_prefix = (code_int *) malloc(HSIZE * sizeof(code_int));
    hash_suffix = (UINT8 *) malloc(HSIZE * sizeof(UINT8));
	
    outfile=file; 
    n_bits = init_bits = ibits;
	
	maxcode = MAXCODE(n_bits);
	ClearCode = ((code_int) 1 << (init_bits - 1));
    EOFCode = ClearCode + 1;
    free_code = ClearCode + 2;
    first_byte = true;		
	
	// 初始化输出缓冲区变量.
	bytesinpkt = 0;
    cur_accum = 0;
    cur_bits = 0;
	// 清理hash表. 
    clear_hash();
	// 给定一个初始清理字符.
    output(ClearCode);
}

//---------------------------------------------------------------------------
// 得到和压缩一个 8-bit 字节. 
void CLZW::compress_byte (int c)
{
	register hash_int i;
	register hash_int disp;
	
	if (first_byte) {		// 需要初始化一个等待码. 
		waiting_code = c;
		first_byte = false;
		return;
	}
	
	i = ((hash_int) c << (MAX_LZW_BITS-8)) + waiting_code;
	// i 小于两倍的 2**MAX_LZW_BITS, 因此 小于两倍的HSIZE. 
	if (i >= HSIZE)
		i -= HSIZE;
	
	if (hash_code[i] != 0) {	
		if (hash_prefix[i] == waiting_code && hash_suffix[i] == (UINT8) c) {
			waiting_code = hash_code[i];
			return;
		}
		
		if (i == 0)	// 第二个hash码 (根据 G. Knott).
			disp = 1;
		else
			disp = HSIZE - i;
		
		while (1) {
			i -= disp;
			if (i < 0) i += HSIZE;
			if (hash_code[i] == 0)  break;			
			if (hash_prefix[i] == waiting_code && hash_suffix[i] == (UINT8) c) {
				waiting_code = hash_code[i];
				return;
			}
		}
	}
	
	// 如果 hashtable[i] 是一个空的, 期望的符号不在表中.
	output(waiting_code);
	if (free_code < LZW_TABLE_SIZE) {
		// 在hash表里增加一个符号.
		hash_code[i] = free_code++;	
		hash_prefix[i] = waiting_code;
		hash_suffix[i] = (UINT8) c;
	} else
		clear_block();
	waiting_code = c;
}

//---------------------------------------------------------------------------
// 结尾保存.
void CLZW::compress_term ()  
{
	// 保存缓冲区里的代码. 
	if (! first_byte)
		output(waiting_code);
	// 发送一个结束代码.
	output(EOFCode);
	// 保存 bit-packing 缓冲区数据. 
	if (cur_bits > 0) {
		CHAR_OUT(cur_accum & 0xFF);
	}
	// 保存缓冲区里的代码. 
	flush_packet();
}
lzw.cpp - 源码说明

本页面展示了「该软件用lzw算法完成对文件的压缩和解压缩」中的 lzw.cpp 源码文件，采用 C++ 编程语言编写，共 378 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与lzw相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?