📄 lzw.cpp
字号:
#include <stdio.h>
#include <assert.h>
#include <windows.h>
void dbg_print(const char *format, ...){}
#define CODE_LENGTH (11)
#define MAX_CB_ONECE (1<<CODE_LENGTH) /* The maxinum bytes to compress at one time */
#define MAX_ST_ENTRIES MAX_CB_ONECE
#define MAXIMUM_CODE (MAX_CB_ONECE-1)
#define MAX_STR_NUM (0x10000)
struct string_t
{
unsigned short str_len;
unsigned short start_pos;
};
struct st_entry_t
{
string_t the_string;
struct st_entry_t *next;
};
struct tt_entry_t
{
string_t the_string;
};
struct translated_string_t
{
string_t the_string;
long character;
};
struct lzw_info_t
{
unsigned long code_num : 31;
unsigned long data_compressed : 1;
};
#pragma pack()
enum
{
RCOK_DATA_COM = 1, // Data are compressed
RCOK_DATA_UNC = 2, /*++
Data are not compressed,
just copied directly from source buffer to destination buffer.
++*/
RCFAILED = -1, // Data compression fails due to some reasons.
};
/*+++
// Data area
==*/
static union
{
st_entry_t lzw_string_table_entries[MAX_ST_ENTRIES];
tt_entry_t lzw_translation_table_entries[MAX_ST_ENTRIES];
};
static union
{
st_entry_t *lzw_string_table[MAX_STR_NUM];
tt_entry_t *lzw_translation_table[MAX_STR_NUM];
};
void set_bit(unsigned char *bit_stream, const unsigned long offset_in_bits)
{
unsigned long offset_in_bytes, offset_within_byte;
offset_in_bytes = offset_in_bits >> 3;
offset_within_byte = offset_in_bits & 7;
*(bit_stream+offset_in_bytes) |= (1<<offset_within_byte);
}
void clear_bit(unsigned char *bit_stream,
const unsigned long offset_in_bits)
{
unsigned long offset_in_bytes, offset_within_byte;
offset_in_bytes = offset_in_bits >> 3 ;
offset_within_byte = offset_in_bits & 7;
*(bit_stream+offset_in_bytes) &= (~(1<<offset_within_byte));
}
int read_bit(unsigned char *bit_stream, const unsigned long offset_in_bits)
{
unsigned long offset_in_bytes;
unsigned long offset_within_byte;
offset_in_bytes = offset_in_bits >> 3 ;
offset_within_byte = offset_in_bits & 7;
return ( ( *(unsigned long *) (bit_stream+offset_in_bytes) )
>> offset_within_byte ) & 1;
}
void write_data_to_bs(unsigned long *data, int bits_of_data,
unsigned char *bit_stream, unsigned long &bit_offset)
{
int i;
int n;
unsigned long _bit_offset = bit_offset, _bits_of_data = bits_of_data;
while( bits_of_data > 0 )
{
n = bits_of_data > 32 ? 32 : bits_of_data;
for(i=0; i<n; i++)
{
if( ( ( *data ) >> i) & 1 )
{
set_bit(bit_stream, bit_offset);
}
else
{
clear_bit(bit_stream, bit_offset);
}
bit_offset++;
}
data ++;
bits_of_data -= n;
if( bits_of_data )
{
assert(0);
}
}
assert( bit_offset - _bit_offset == _bits_of_data);
}
void write_char_to_bs(const unsigned char data, unsigned char *bit_stream,
unsigned long &bit_offset)
{
unsigned long data_buff = data;
write_data_to_bs(&data_buff, 8, bit_stream, bit_offset);
}
void write_short_to_bs(const unsigned short data, unsigned char *bit_stream,
unsigned long &bit_offset)
{
unsigned long data_buff = data;
write_data_to_bs(&data_buff, 16, bit_stream, bit_offset);
}
void read_data_from_bs(void *data, int bits_of_data,
unsigned char *bit_stream, unsigned long &bit_offset)
{
int i;
int n;
while( bits_of_data > 0 )
{
n = bits_of_data > 8 ? 8 : bits_of_data;
*(unsigned char *)data = 0;
for(i=0; i<n; i++)
{
if( read_bit(bit_stream, bit_offset) )
{
( *(unsigned char *)data ) |= (1<<i);
}
bit_offset++;
}
data = ( (unsigned char *)data ) + 1;
bits_of_data -= n;
}
}
unsigned char read_char_from_bs(unsigned char *bit_stream, unsigned long &bit_offset)
{
unsigned char data;
read_data_from_bs(&data, 8, bit_stream, bit_offset);
return data;
}
unsigned short read_short_from_bs(unsigned char *bit_stream, unsigned long &bit_offset)
{
unsigned short data;
read_data_from_bs( (unsigned char *) &data, 16, bit_stream, bit_offset);
return data;
}
int search_string_table(unsigned char *data_buff, int string_num,
st_entry_t **string_table, string_t *cur_str, st_entry_t **matched_entry=NULL)
{
int length = cur_str->str_len + 1;
unsigned short index = *(unsigned short *) (data_buff + cur_str->start_pos);
st_entry_t *next_entry = string_table[index];
*matched_entry = NULL;
while( next_entry )
{
if( length == next_entry->the_string.str_len &&
memcmp(data_buff + next_entry->the_string.start_pos,
data_buff + cur_str->start_pos, length) == 0 )
{
if( matched_entry )
{
*matched_entry = next_entry;
}
return (int)index;
}
next_entry = next_entry->next;
}
return -1;
}
void addto_string_table(unsigned char *data_buff,
int &string_num, st_entry_t **string_table, st_entry_t *st_entries,
string_t *cur_str)
{
unsigned short index = *(unsigned short *) (data_buff + cur_str->start_pos);
st_entries[string_num].the_string = (*cur_str);
st_entries[string_num].next = NULL;
st_entry_t *pre_entry = string_table[index];
if( pre_entry )
{
while( pre_entry->next )
{
pre_entry = pre_entry->next;
}
pre_entry->next = &st_entries[string_num];
}
else
{
string_table[index] = &st_entries[string_num];
}
string_num++;
}
void output_code(unsigned short code, unsigned char *out_data_buffer,
unsigned long &bit_offset)
{
write_data_to_bs(
(unsigned long *)&code, CODE_LENGTH, out_data_buffer, bit_offset);
}
int lzw_compress(st_entry_t **string_table, st_entry_t *st_entries,
unsigned char *in_data_buffer, int in_data_len,
unsigned char *out_data_buffer, unsigned long *out_data_len)
{
int ret_val = RCFAILED;
int string_num;
string_t cur_string; // string table entry
st_entry_t *matched_entry;
int cur_pos;
unsigned long bit_offset;
st_entry_t **tmp_ptr1=NULL, *tmp_ptr2=NULL;
if( in_data_len > MAX_CB_ONECE )
{
goto err_out;
}
if( !string_table )
{
tmp_ptr1 = new st_entry_t *[MAX_ST_ENTRIES];
if( !tmp_ptr1 )
{
goto err_out;
}
string_table = tmp_ptr1;
}
if( !st_entries )
{
tmp_ptr2 = new st_entry_t[MAX_ST_ENTRIES];
if( !tmp_ptr2 )
{
goto err_out;
}
st_entries = tmp_ptr2;
}
memset(string_table, 0, sizeof(st_entry_t *) * MAX_STR_NUM);
memset(st_entries, 0, sizeof(st_entry_t) * MAX_ST_ENTRIES);
int cnt1, cnt2;
unsigned long total_string_length;
string_num = 0;
bit_offset = 0;
cur_string.start_pos = 0;
cur_string.str_len = 1;
cur_pos = 1;
matched_entry = NULL;
cnt1=0; cnt2=0; total_string_length=0;
while( 1/*cur_pos < in_data_len*/ )
{
st_entry_t *tmp_matched_entry;
if( string_num + 256 >= MAXIMUM_CODE ||
bit_offset >= (unsigned long)in_data_len * CODE_LENGTH )
{
memcpy(out_data_buffer, in_data_buffer, in_data_len);
*out_data_len = in_data_len * 8;
ret_val = RCOK_DATA_UNC;
goto cprs_fail;
}
if( cur_pos < in_data_len &&
search_string_table(in_data_buffer, string_num,
string_table, &cur_string, &tmp_matched_entry) >= 0 )
{
cur_string.str_len ++ ;
matched_entry = tmp_matched_entry;
}
else
{
unsigned short code;
if( matched_entry )
{
code = 256 + (unsigned short) (matched_entry - st_entries);
//matched_entry = NULL;
cnt1++;
total_string_length+=cur_string.str_len;
matched_entry = NULL;
}
else
{
code = (unsigned short) in_data_buffer[cur_string.start_pos];
cnt2++;
}
dbg_print("byte offset=%d, ", cur_string.start_pos);
dbg_print("code=%d, string length=%d\n", code, cur_string.str_len);
output_code(code, out_data_buffer, bit_offset);
cur_string.str_len++;
addto_string_table(in_data_buffer, string_num, string_table,
st_entries, &cur_string);
cur_string.start_pos = cur_pos;
cur_string.str_len = 1;
if( cur_pos >= in_data_len )
{
break;
}
}
assert(bit_offset == (unsigned long)(cnt1+cnt2)*CODE_LENGTH);
cur_pos++;
}
*out_data_len = bit_offset;
ret_val = RCOK_DATA_COM;
err_out:
cprs_fail:
if( tmp_ptr1 )
{
delete []tmp_ptr1;
}
if( tmp_ptr2 )
{
delete []tmp_ptr2;
}
return ret_val;
}
int search_translation_table(tt_entry_t **translation_table, unsigned short code)
{
if( code<256 || translation_table[code-256] )
{
return 1;
}
else
{
return -1;
}
}
void translate_code(tt_entry_t **translation_table, unsigned short code,
translated_string_t &cur_str)
{
if( code<256 )
{
cur_str.the_string.start_pos = 0;
cur_str.the_string.str_len = 0;
cur_str.character = (int)code;
}
else
{
cur_str.the_string = translation_table[code-256]->the_string;
cur_str.character = -1;
}
}
void addto_translation_table(tt_entry_t **translation_table, tt_entry_t *tt_entries,
const tt_entry_t &new_string, unsigned long &string_num)
{
translation_table[string_num] = &tt_entries[string_num];
tt_entries[string_num] = new_string;
string_num ++;
}
int lzw_decompress(tt_entry_t **translation_table, tt_entry_t *tt_entries,
unsigned char *in_data_buffer, unsigned long code_num,
unsigned char *out_data_buffer, unsigned long *out_data_len)
{
enum
{
STA_STRING = 8,
STA_CHAR = 9,
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -