⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lzw算法源码c语言.c

📁 LZW压缩算法简介 作者:宋成 描述:一篇关于LZW压缩算法简介的文章
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <conio.h>
#include <windows.h>

//#define _DISPLAY_DBGINFO_
#define INLINE __inline

#ifdef _DISPLAY_DBGINFO_
#define DBG_PRINT(ARGS) printf##ARGS
#else
#define DBG_PRINT(ARGS)
#endif

typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned long u32;

extern "C" void WINAPI write_code(void);
extern "C" u32 WINAPI read_code(void);


//#define _USE_ASM_VER__WRITE_DATA_TO_BS
//#define _USE_ASM_VER__MEMCMP

//#define _USE_ASM_VER__LZW_COMPRESS
//#define _USE_ASM_VER__LZW_DECOMPRESS


#define CODE_LENGTH  (11)
#define MAX_CB_ONECE (1<<CODE_LENGTH) /*  The maxinum bytes to compress at one time */
#define MAX_ST_ENTRIES MAX_CB_ONECE
#define MAXIMUM_CODE (MAX_CB_ONECE-1)
#define MAX_STR_NUM  (0x10000)

#pragma pack(1)

typedef struct
{
 u16 start_pos;
 u16 str_len;
} string_t;

typedef struct _st_entry_t
{
 string_t the_string;
 struct _st_entry_t *next;
} st_entry_t;

typedef struct
{
 string_t the_string;
} tt_entry_t;

typedef struct
{
 string_t the_string;
 long character;
} translated_string_t;

typedef struct
{
 u32 code_num : 31;
 u32 data_compressed : 1;
} lzw_info_t;

#pragma pack()

enum
{
 RCOK_DATA_COM = 1, // Data are compressed
 RCOK_DATA_UNC = 2,  /*++
  Data are not compressed,
  just copied  directly from source buffer to destination buffer.
      ++*/
 RCFAILED = -1, // Data compression fails due to some reasons.
};

/*+++
// Data area
==*/
static union
{
 st_entry_t lzw_string_table_entries[MAX_ST_ENTRIES];
 tt_entry_t lzw_translation_table_entries[MAX_ST_ENTRIES];
};

static union
{
 st_entry_t *lzw_string_table[MAX_STR_NUM];
};

#ifdef _USE_ASM_VER__MEMCMP
extern "C" int WINAPI CompareMemory(void *s, void *d, int size);
#else
#define CompareMemory memcmp
#endif

void set_bit(u8 *bit_stream, const u32 offset_in_bits)
{
 u32 offset_in_bytes, offset_within_byte;

 offset_in_bytes = offset_in_bits >> 3;
 offset_within_byte = offset_in_bits & 7;

 *(bit_stream+offset_in_bytes) |= (1<<offset_within_byte);
}

void clear_bit(u8 *bit_stream,
 const u32 offset_in_bits)
{
 u32 offset_in_bytes, offset_within_byte;

 offset_in_bytes = offset_in_bits >> 3 ;
 offset_within_byte = offset_in_bits & 7;

 *(bit_stream+offset_in_bytes) &= (~(1<<offset_within_byte));
}

int read_bit(u8 *bit_stream, const u32 offset_in_bits)
{
 u32 offset_in_bytes;
 u32 offset_within_byte;

 offset_in_bytes = offset_in_bits >> 3 ;
 offset_within_byte = offset_in_bits & 7;

 return ( ( *(u32 *) (bit_stream+offset_in_bytes) )
  >> offset_within_byte ) & 1;
}


void write_data_to_bs(u32 *data, int bits_of_data,
 u8 *bit_stream, u32 &bit_offset)
{

#ifdef _USE_ASM_VER__WRITE_DATA_TO_BS

 __asm
 {
  push esi
   push ebx

   mov esi, data
   mov eax, [esi]
   mov esi, bit_stream
   mov ebx, bit_offset
   mov ebx, [ebx]

   call write_code

   mov esi, bit_offset
   add dword ptr [esi], CODE_LENGTH

   pop ebx
   pop esi
 }
#else

 int i;
 int n;

 u32 _bit_offset = bit_offset, _bits_of_data = bits_of_data;

 while( bits_of_data > 0 )
 {
  n = bits_of_data > 32 ? 32 : bits_of_data;

  for(i=0; i<n; i++)
  {
   if( ( ( *data ) >> i) & 1 )
   {
    set_bit(bit_stream, bit_offset);
   }
   else
   {
    clear_bit(bit_stream, bit_offset);
   }

   bit_offset++;
  }

  data ++;
  bits_of_data -= n;

  if( bits_of_data )
  {
   assert(0);
  }
 }

 assert( bit_offset - _bit_offset == _bits_of_data);

#endif

}

void write_char_to_bs(const u8 data, u8 *bit_stream,
 u32 &bit_offset)
{
 u32 data_buff = data;

 write_data_to_bs(&data_buff, 8, bit_stream, bit_offset);
}

void write_short_to_bs(const u16 data, u8 *bit_stream,
 u32 &bit_offset)
{
 u32 data_buff = data;

 write_data_to_bs(&data_buff, 16, bit_stream, bit_offset);
}


void read_data_from_bs(void *data, int bits_of_data,
  u8 *bit_stream, u32 &bit_offset)
{

#ifdef _USE_ASM_VER__WRITE_DATA_TO_BS

 __asm
 {
  push esi
   push ebx

   mov esi, bit_stream
   mov ebx, bit_offset
   mov ebx, [ebx]

   call read_code

   mov esi, data
   mov [esi], eax

   mov esi, [bit_offset]
   add dword ptr [esi], CODE_LENGTH

   pop ebx
   pop esi
 }

#else

 int i;
 int n;

 while( bits_of_data > 0 )
 {
  n = bits_of_data > 8 ? 8 : bits_of_data;
  *(u8 *)data = 0;

  for(i=0; i<n; i++)
  {
   if( read_bit(bit_stream, bit_offset) )
   {
    ( *(u8 *)data ) |= (1<<i);
   }

   bit_offset++;
  }

  data = ( (u8 *)data ) + 1;
  bits_of_data -= n;
 }

#endif

}

u8 read_char_from_bs(u8 *bit_stream, u32 &bit_offset)
{
 u8 data;
 read_data_from_bs(&data, 8, bit_stream, bit_offset);

 return data;
}

u16 read_short_from_bs(u8 *bit_stream, u32 &bit_offset)
{
 u16 data;
 read_data_from_bs( (u8 *) &data, 16, bit_stream, bit_offset);

 return data;
}


int search_string_table(u8 *data_buff, int string_num,
 st_entry_t **string_table, string_t *cur_str, st_entry_t **matched_entry=NULL)
{

 int length = cur_str->str_len + 1;

 u16 index = *(u16 *) (data_buff + cur_str->start_pos);
 st_entry_t *next_entry = string_table[index];

 *matched_entry = NULL;

 while( next_entry )
 {
  if( length == next_entry->the_string.str_len  &&
   CompareMemory(data_buff + next_entry->the_string.start_pos,
    data_buff + cur_str->start_pos, length) == 0 )
  {
   if( matched_entry )
   {
    *matched_entry = next_entry;
   }
   return (int)index;
  }

  next_entry = next_entry->next;
 }

 return -1;
}

void addto_string_table(u8 *data_buff,
 int &string_num, st_entry_t **string_table, st_entry_t *st_entries,
 string_t *cur_str)
{
 u16 index = *(u16 *) (data_buff + cur_str->start_pos);

 st_entries[string_num].the_string = (*cur_str);
 st_entries[string_num].next = NULL;
 st_entry_t *pre_entry = string_table[index];

 if( pre_entry )
 {
  while( pre_entry->next )
  {
   pre_entry = pre_entry->next;
  }

  pre_entry->next = &st_entries[string_num];
 }
 else
 {
  string_table[index] = &st_entries[string_num];
 }

 string_num++;

}

void output_code(u16 code, u8 *out_data_buffer,
  u32 &bit_offset)
{
 write_data_to_bs(
  (u32 *)&code, CODE_LENGTH, out_data_buffer,  bit_offset);
}

#ifdef _USE_ASM_VER__LZW_COMPRESS

extern "C" int WINAPI lzw_compress(st_entry_t **string_table, st_entry_t *st_entries,
 u8 *in_data_buffer, int in_data_len,
 u8 *out_data_buffer, u32 *out_data_len);

#else

int lzw_compress(st_entry_t **string_table, st_entry_t *st_entries,
 u8 *in_data_buffer, int in_data_len,
 u8 *out_data_buffer, u32 *out_data_len)
{
 int ret_val = RCFAILED;

 int string_num;

 string_t cur_string; // string table entry
 st_entry_t *matched_entry;

 int cur_pos;
 u32 bit_offset;

 st_entry_t **tmp_ptr1=NULL, *tmp_ptr2=NULL;

 if( in_data_len > MAX_CB_ONECE )
 {
  goto err_out;
 }

 if( !string_table )
 {
  tmp_ptr1 = new st_entry_t *[MAX_ST_ENTRIES];
  if( !tmp_ptr1 )
  {
   goto err_out;
  }
  string_table = tmp_ptr1;
 }

 if( !st_entries )
 {
  tmp_ptr2 = new st_entry_t[MAX_ST_ENTRIES];
  if( !tmp_ptr2 )
  {
   goto err_out;
  }
  st_entries = tmp_ptr2;
 }

 memset(string_table, 0, sizeof(st_entry_t *) * MAX_STR_NUM);
 memset(st_entries, 0, sizeof(st_entry_t) * MAX_ST_ENTRIES);

 int cnt1, cnt2;
 u32 total_string_length;


 string_num = 0;
 bit_offset = 0;
 cur_string.start_pos = 0;
 cur_string.str_len = 1;
 cur_pos = 1;
 matched_entry = NULL;

 cnt1=0; cnt2=0; total_string_length=0;
 while( 1/*cur_pos < in_data_len*/ )
 {
  st_entry_t *tmp_matched_entry;

  if( cur_pos==2047 )
  {
   cur_pos = cur_pos;
  }

  if( string_num + 256 >= MAXIMUM_CODE ||
   bit_offset >= (u32)in_data_len * CODE_LENGTH )
  {
   memcpy(out_data_buffer, in_data_buffer, in_data_len);
   *out_data_len = in_data_len;
   ret_val = RCOK_DATA_UNC;
   goto cprs_fail;
  }

  if( cur_pos < in_data_len &&
   search_string_table(in_data_buffer, string_num,
    string_table, &cur_string, &tmp_matched_entry) >= 0 )
  {
   cur_string.str_len ++ ;

   matched_entry = tmp_matched_entry;
  }
  else
  {
   u16 code;

   if( matched_entry )
   {
    code = 256 + (u16) (matched_entry - st_entries);

    //matched_entry = NULL;

    cnt1++;
    total_string_length+=cur_string.str_len;

    matched_entry = NULL;
   }
   else
   {
    code = (u16) in_data_buffer[cur_string.start_pos];
    cnt2++;
   }

   DBG_PRINT(("byte offset=%d, ", cur_string.start_pos));
   DBG_PRINT(("code=%d, string length=%d\n", code, cur_string.str_len));

   output_code(code, out_data_buffer, bit_offset);

   cur_string.str_len++;
   addto_string_table(in_data_buffer, string_num, string_table,
    st_entries, &cur_string);

   cur_string.start_pos = cur_pos;
   cur_string.str_len = 1;

   if( cur_pos >= in_data_len )
   {
    break;
   }
  }

  assert(bit_offset == (u32)(cnt1+cnt2)*CODE_LENGTH);
  cur_pos++;
 }

 *out_data_len = bit_offset / CODE_LENGTH;
 ret_val = RCOK_DATA_COM;

err_out:
cprs_fail:

 if( tmp_ptr1 )
 {
  delete []tmp_ptr1;
 }

 if( tmp_ptr2 )
 {
  delete []tmp_ptr2;
 }

 return ret_val;
}

#endif

int search_translation_table(tt_entry_t *tt_entries, u16 code)
{
 if( code<256 || tt_entries[code-256].the_string.str_len>0 )
 {
  return 1;
 }
 else
 {
  return -1;
 }
}

void translate_code(tt_entry_t *tt_entries, u16 code,
 translated_string_t &cur_str)
{
 if( code<256 )
 {
  cur_str.the_string.start_pos = 0;
  cur_str.the_string.str_len = 0;
  cur_str.character = (int)code;
 }
 else
 {
  cur_str.the_string = tt_entries[code-256].the_string;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -