⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lzw.cpp

📁 lzw的压缩程序
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <assert.h>
#include <windows.h>

void dbg_print(const char *format, ...){}

#define CODE_LENGTH		(11)
#define MAX_CB_ONECE	(1<<CODE_LENGTH)	/*  The maxinum bytes to compress at one time */
#define MAX_ST_ENTRIES	MAX_CB_ONECE
#define MAXIMUM_CODE	(MAX_CB_ONECE-1)
#define MAX_STR_NUM		(0x10000)

struct	string_t
{
	unsigned short str_len;
	unsigned short start_pos;
};

struct st_entry_t
{
	string_t the_string;
	struct st_entry_t *next;
};

struct	tt_entry_t
{
	string_t the_string;
};

struct	translated_string_t 
{
	string_t the_string;
	long character;
};

struct	lzw_info_t
{
	unsigned long code_num : 31;
	unsigned long data_compressed : 1;
};

#pragma pack()

enum
{
	RCOK_DATA_COM = 1, // Data are compressed
	RCOK_DATA_UNC = 2,	 /*++ 
		Data are not compressed, 
		just copied  directly from source buffer to destination buffer.
						++*/
	RCFAILED = -1,	// Data compression fails due to some reasons.
};

/*+++
// Data area
==*/
static union
{
	st_entry_t lzw_string_table_entries[MAX_ST_ENTRIES];
	tt_entry_t lzw_translation_table_entries[MAX_ST_ENTRIES];
};

static union
{
	st_entry_t *lzw_string_table[MAX_STR_NUM];
	tt_entry_t *lzw_translation_table[MAX_STR_NUM];
};

void set_bit(unsigned char *bit_stream, const unsigned long offset_in_bits)
{
	unsigned long offset_in_bytes, offset_within_byte;

	offset_in_bytes = offset_in_bits >> 3;
	offset_within_byte = offset_in_bits & 7;

	*(bit_stream+offset_in_bytes) |= (1<<offset_within_byte);
}

void clear_bit(unsigned char *bit_stream,
	const unsigned long	offset_in_bits)
{
	unsigned long offset_in_bytes, offset_within_byte;

	offset_in_bytes = offset_in_bits >> 3 ;
	offset_within_byte = offset_in_bits & 7;

	*(bit_stream+offset_in_bytes) &= (~(1<<offset_within_byte));
}

int read_bit(unsigned char *bit_stream, const unsigned long offset_in_bits)
{
	unsigned long offset_in_bytes;
	unsigned long offset_within_byte;

	offset_in_bytes = offset_in_bits >> 3 ;
	offset_within_byte = offset_in_bits & 7;

	return ( ( *(unsigned long *) (bit_stream+offset_in_bytes) )
		>> offset_within_byte ) & 1;
}

void write_data_to_bs(unsigned long *data, int bits_of_data,
	unsigned char *bit_stream, unsigned long &bit_offset)
{
	int i;
	int n;

	unsigned long _bit_offset = bit_offset, _bits_of_data = bits_of_data;

	while( bits_of_data > 0 )
	{
		n = bits_of_data > 32 ? 32 : bits_of_data;

		for(i=0; i<n; i++)
		{
			if( ( ( *data ) >> i) & 1 )
			{
				set_bit(bit_stream, bit_offset);
			}
			else
			{
				clear_bit(bit_stream, bit_offset);
			}

			bit_offset++;
		}

		data ++;
		bits_of_data -= n;

		if( bits_of_data )
		{
			assert(0);
		}
	}

	assert( bit_offset - _bit_offset == _bits_of_data);
}

void write_char_to_bs(const unsigned char data, unsigned char *bit_stream,
	unsigned long &bit_offset)
{
	unsigned long data_buff = data;

	write_data_to_bs(&data_buff, 8, bit_stream, bit_offset);
}

void write_short_to_bs(const unsigned short data, unsigned char *bit_stream,
	unsigned long &bit_offset)
{
	unsigned long data_buff = data;

	write_data_to_bs(&data_buff, 16, bit_stream, bit_offset);
}


void read_data_from_bs(void *data, int bits_of_data,
	 unsigned char *bit_stream, unsigned long &bit_offset)
{
	int i;
	int n;

	while( bits_of_data > 0 )
	{
		n = bits_of_data > 8 ? 8 : bits_of_data;
		*(unsigned char *)data = 0;

		for(i=0; i<n; i++)
		{
			if( read_bit(bit_stream, bit_offset) )
			{
				( *(unsigned char *)data ) |= (1<<i);
			}

			bit_offset++;
		}

		data = ( (unsigned char *)data ) + 1;
		bits_of_data -= n;
	}
}

unsigned char read_char_from_bs(unsigned char *bit_stream, unsigned long &bit_offset)
{
	unsigned char data;
	read_data_from_bs(&data, 8, bit_stream, bit_offset);

	return data;
}

unsigned short read_short_from_bs(unsigned char *bit_stream, unsigned long &bit_offset)
{
	unsigned short data;
	read_data_from_bs( (unsigned char *) &data, 16, bit_stream, bit_offset);

	return data;
}


int search_string_table(unsigned char *data_buff, int string_num,
	st_entry_t **string_table, string_t *cur_str, st_entry_t **matched_entry=NULL)
{

	int length = cur_str->str_len + 1;

	unsigned short index = *(unsigned short *) (data_buff + cur_str->start_pos);
	st_entry_t *next_entry = string_table[index];

	*matched_entry = NULL;

	while( next_entry )
	{
		if( length == next_entry->the_string.str_len  &&
			memcmp(data_buff + next_entry->the_string.start_pos,
				data_buff + cur_str->start_pos, length) == 0 )
		{
			if( matched_entry )
			{
				*matched_entry = next_entry;
			}
			return (int)index;
		}

		next_entry = next_entry->next;
	}

	return -1;
}

void addto_string_table(unsigned char *data_buff,
	int &string_num, st_entry_t **string_table, st_entry_t *st_entries,
	string_t *cur_str)
{
	unsigned short index = *(unsigned short *) (data_buff + cur_str->start_pos);

	st_entries[string_num].the_string = (*cur_str);
	st_entries[string_num].next = NULL;
	st_entry_t *pre_entry = string_table[index];

	if( pre_entry )
	{
		while( pre_entry->next )
		{
			pre_entry = pre_entry->next;
		}

		pre_entry->next = &st_entries[string_num];
	}
	else
	{
		string_table[index] = &st_entries[string_num];
	}

	string_num++;

}

void output_code(unsigned short code, unsigned char *out_data_buffer,
	 unsigned long &bit_offset)
{
	write_data_to_bs(
		(unsigned long *)&code, CODE_LENGTH, out_data_buffer,  bit_offset);
}


int lzw_compress(st_entry_t **string_table, st_entry_t *st_entries,
	unsigned char *in_data_buffer, int in_data_len,
	unsigned char *out_data_buffer, unsigned long *out_data_len)
{
	int ret_val = RCFAILED;

	int string_num;
	
	string_t cur_string;	// string table entry
	st_entry_t *matched_entry;

	int cur_pos;
	unsigned long bit_offset;

	st_entry_t **tmp_ptr1=NULL, *tmp_ptr2=NULL;

	if( in_data_len > MAX_CB_ONECE )
	{
		goto err_out;
	}

	if( !string_table )
	{	
		tmp_ptr1 = new st_entry_t *[MAX_ST_ENTRIES];
		if( !tmp_ptr1 )
		{
			goto err_out;
		}
		string_table = tmp_ptr1;
	}

	if( !st_entries )
	{
		tmp_ptr2 = new st_entry_t[MAX_ST_ENTRIES];
		if( !tmp_ptr2 )
		{
			goto err_out;
		}
		st_entries = tmp_ptr2;
	}

	memset(string_table, 0, sizeof(st_entry_t *) * MAX_STR_NUM);
	memset(st_entries, 0, sizeof(st_entry_t) * MAX_ST_ENTRIES);

	int cnt1, cnt2;
	unsigned long total_string_length;


	string_num = 0;
	bit_offset = 0;
	cur_string.start_pos = 0;
	cur_string.str_len = 1;
	cur_pos = 1;
	matched_entry = NULL;

	cnt1=0; cnt2=0; total_string_length=0;
	while( 1/*cur_pos < in_data_len*/ )
	{
		st_entry_t *tmp_matched_entry;

		if( string_num + 256 >= MAXIMUM_CODE || 
			bit_offset >= (unsigned long)in_data_len * CODE_LENGTH )
		{
			memcpy(out_data_buffer, in_data_buffer, in_data_len);
			*out_data_len = in_data_len * 8;
			ret_val = RCOK_DATA_UNC;
			goto cprs_fail;
		}

		if( cur_pos < in_data_len && 
			search_string_table(in_data_buffer, string_num,
				string_table, &cur_string, &tmp_matched_entry) >= 0 )
		{
			cur_string.str_len ++ ;

			matched_entry = tmp_matched_entry;
		}
		else
		{
			unsigned short code;
			
			if( matched_entry )
			{
				code = 256 + (unsigned short) (matched_entry - st_entries);
				
				//matched_entry = NULL;

				cnt1++;
				total_string_length+=cur_string.str_len;

				matched_entry = NULL;
			}
			else
			{
				code = (unsigned short) in_data_buffer[cur_string.start_pos];
				cnt2++;
			}

			dbg_print("byte offset=%d, ", cur_string.start_pos);
			dbg_print("code=%d, string length=%d\n", code, cur_string.str_len);
						
			output_code(code, out_data_buffer, bit_offset);

			cur_string.str_len++;
			addto_string_table(in_data_buffer, string_num, string_table,
				st_entries, &cur_string);

			cur_string.start_pos = cur_pos;
			cur_string.str_len = 1;

			if( cur_pos >= in_data_len )
			{
				break;
			}
		}

		assert(bit_offset == (unsigned long)(cnt1+cnt2)*CODE_LENGTH);
		cur_pos++;
	}

	*out_data_len = bit_offset;
	ret_val = RCOK_DATA_COM;

err_out:
cprs_fail:
	
	if( tmp_ptr1 )
	{
		delete []tmp_ptr1;
	}

	if( tmp_ptr2 )
	{
		delete []tmp_ptr2;
	}

	return ret_val;
}

int search_translation_table(tt_entry_t **translation_table, unsigned short code)
{
	if( code<256 || translation_table[code-256] )
	{
		return 1;
	}
	else
	{
		return -1;
	}
}

void translate_code(tt_entry_t **translation_table, unsigned short code,
	translated_string_t &cur_str)
{
	if( code<256 )
	{
		cur_str.the_string.start_pos = 0;
		cur_str.the_string.str_len = 0;
		cur_str.character = (int)code;
	}
	else
	{
		cur_str.the_string = translation_table[code-256]->the_string;
		cur_str.character = -1;
	}
}

void addto_translation_table(tt_entry_t **translation_table, tt_entry_t *tt_entries,
	const tt_entry_t &new_string, unsigned long &string_num)
{
	translation_table[string_num] = &tt_entries[string_num];
	tt_entries[string_num] = new_string;

	string_num ++;
}

int lzw_decompress(tt_entry_t **translation_table, tt_entry_t *tt_entries,
	unsigned char *in_data_buffer, unsigned long code_num,
	unsigned char *out_data_buffer, unsigned long *out_data_len)
{
	enum
	{
		STA_STRING = 8,
		STA_CHAR = 9,
	};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -