⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hf.c

📁 huffuman编码程序。
💻 C
字号:
/***********************************************************************
* 文件名称:  %M%
* 文件描述:  huffuman编码解码程序
* 文件版本:  %I%  
* 时    间:  %D%
* 函数列表:

* 作    者:  伍一达  
* 审    核:  伍一达  
* 修订历史:
    07/10/26 9:54 by wuyida
    07/11/02 3:40 by wuyida
***********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
1,仅对a~z,A~Z,0~9,和常见符号'\n','\r',' ','"',''',',','.',':','?'出现的次数进行统计和编码。
26个大写,26个小写,10个数字,9个符号,其他。

2,各字符的十进制数字表示
'\n'  10 LF
'\r'  13 CR 
' '  32
'"'  34
'''  39
','  44
'.'  46
':'  58
'?'  63

3,\n含义是换行,将当前位置移到下一行的开头 '\n'的ASCII码是10 
\r含义是回车,将当前位置移动到本行的开头 '\r'的ASCII码是13 
*/

#define LOWCASENUM 26
#define UPCASENUM 26
#define NUMBERNUM 10
#define PUNCNUM 9
#define ALLCHARNUM ((LOWCASENUM + UPCASENUM + NUMBERNUM + PUNCNUM) + 1)  //操他妈的死bug,没加括号

#define BUFFSIZE 255

#define STR (16+1)  //sizeof(unsigned short)+1

/*字母在文章中出现的次数的对应的编码结构,主结构*/
typedef struct{
char charactor;
int cdlength;
unsigned short code;
int freq;
}FqCd_a;

/*字母在文章中出现的次数的对应的编码结构,主结构*/
typedef struct{
char charactor;
int cdlength;
char *code;
int freq;
}FqCd;

typedef struct 
{
  int weight;
  int parent, lchild, rchild ;
}HTNode, *HuffmanTree;

typedef char **HuffmanCode;

/*动态分配主结构的存储空间。成功返回0,失败返回-1*/
int allocFC(FqCd **ppFC)
{
  *ppFC = (FqCd *)malloc(ALLCHARNUM * sizeof(FqCd)); 
  if(*ppFC == NULL){
    printf("%s, %d: %s", __FILE__, __LINE__, "alloc failed!");
    exit(-1);
  }
  
  return 0;
}

/*释放分配的存储空间。成功返回0,失败返回-1*/
int freemem(FqCd *pFC)
{
  free(pFC);

  return 0;
}

/*初始化主结构,成功返回0,失败返回-1*/
int initFC(FqCd *pFC)
{
  int i, j;

  /*将符号存入主结构中*/
  pFC[0].charactor = '\n';
  pFC[1].charactor = '\r';
  pFC[2].charactor = ' ';
  pFC[3].charactor = '"';
  pFC[4].charactor = '\'';
  pFC[5].charactor = '*';
  pFC[6].charactor = ',';
  pFC[7].charactor = '.';

  /*将数字存入主结构中*/
  for(i=8, j=0; j<NUMBERNUM; i++, j++)
  {
    pFC[i].charactor = '0' + j;
  }
  
  /*将符号存入主结构中*/
  pFC[i++].charactor = ':';
  pFC[i++].charactor = '?';
  
  /*将大写字母存入主结构中*/
  for(j=0; j<UPCASENUM; i++, j++)
  {
    pFC[i].charactor = 'A' + j;
  }
    
  /*将小写字母存入主结构中*/
  for(j=0; j<LOWCASENUM; i++, j++)
  {
    pFC[i].charactor = 'a' + j;
  }

  /*对主结构中的成员freq进行初始化*/
  for(i=0; i<ALLCHARNUM; i++){
    pFC[i].freq = 0;
    pFC[i].cdlength = 0;
    pFC[i].code = '\0';
  }
  return 0;
}

/*折半查找法,结果放入loc中。成功返回0,失败返回-1。*/
int SearchBin(int *loc, char key, FqCd *pFC, int low, int high)
{
  int mid;

  if(pFC == NULL){
    printf("%s, %d: %s", __FILE__, __LINE__, "para error!");
    exit(-1);
  }
  
  if(loc == NULL){
    printf("%s, %d: %s", __FILE__, __LINE__, "para error!");
    exit(-1);
  }
  
  while(low <= high){
    mid = (low + high)/2;

    if(key == pFC[mid].charactor){
      *loc = mid;
      return 0;
    }
    else if(key > pFC[mid].charactor)
      low = mid + 1;
    else
      high = mid - 1;
  }
  
  return -1;
}

/*对相应字符的频率增一,成功返回0,失败返回-1。*/
int addfreq(char ch, FqCd *pFC, int low, int high)
{
  int loc;

  if(SearchBin(&loc, ch, pFC, low, high) != 0){
    printf("%s, %d: %s", __FILE__, __LINE__, "searching failed!");
    exit(-1);
  }
  else{
    pFC[loc].freq += 1;
  }
  
  return 0;
}

/*统计文件中各字母出现的频率,成功返回0,失败返回-1。*/
int frequency(FqCd *pFC)
{
  int i;
  FILE *stream;
  char buf[BUFFSIZE];
  
  stream = fopen("in0.txt", "r");
  if(stream == NULL){
    //fprintf(stderr, "%s, %d: %s", __FILE__, __LINE__, "open file failed!");
    printf("%s, %d: %s", __FILE__, __LINE__, "open file failed!");
    exit(-1);
  }
/*
  for(i=0; i<ALLCHARNUM; i++)
    printf("pFC[%d].charactor = %c, pFC[%d].freq = %d, pFC[%d].code = %d\n", i, pFC[i].charactor, i, pFC[i].freq, i, pFC[i].code);
*/    
  /*memset(buf, '\0', BUFFSIZE);*/
  while(fgets(buf, BUFFSIZE, stream)){
    for(i=0; ((buf[i] != '\0') && (i<BUFFSIZE)); i++){
      if((buf[i]>='a')&&(buf[i]<='z')){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if((buf[i] >= 'A')&&(buf[i] <= 'Z')){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if((buf[i] >= '0')&&(buf[i] <= '9')){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == '\n'){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == '\r'){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == ' '){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == '"'){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == '\''){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == ','){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == '.'){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == ':'){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else if(buf[i] == '?'){
        addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
      }
      else
        addfreq('*', pFC, 0, ALLCHARNUM-1);
    }
  }
  
  fclose(stream);
  
  return 0;
}

/*在0-n范围内选择出两个parent值为零的最小的数,将它们对应的index值赋给s1,s2。成功返回0,失败返回-1。*/
int select(HuffmanTree ht, int n, int *s1, int *s2)
{
  int i;
  int minindex, minvalue;
  int flag;
  
  //选择节点parent值为零的第一个最小节点的序号
  flag = 0;
  for(i=0; i<=n; i++){
    if(ht[i].parent == 0){
      if(flag == 0){
        minindex = i;
        minvalue = ht[i].weight;
        flag = 1;
        continue;
      }
      if(ht[i].weight < minvalue)
      {
        minindex = i;
        minvalue = ht[i].weight;
      }
    }
  }//for
  *s1 = minindex;
  
  //选择节点parent值为零的第二个最小节点的序号
  flag = 0;
  for(i=0; i<=n; i++){
    if((i!=*s1) && (ht[i].parent==0)){
      if(flag == 0){
        minindex = i;
        minvalue = ht[i].weight;
        flag = 1;
        continue;
      }
      if((i!=*s1) && (ht[i].weight<minvalue))
      {
        minindex = i;
        minvalue = ht[i].weight;
      }
    }
  }//for
  *s2 = minindex;
  
  return 0;
}

/*给huffman树分配动态空间。成功返回0,失败返回-1。*/
int allocHT(HuffmanTree *pht, int n)
{
  *pht = (HuffmanTree)malloc((2*n - 1) * sizeof(HTNode));
  if(*pht == NULL){
    printf("%s, %d: %s", __FILE__, __LINE__, "alloc failed!");
    exit(-1);
  }
  
  return 0;
}

/*初始化huffman树。成功返回0,失败返回-1。*/
int initHT(HuffmanTree ht, FqCd *pFC, int n)
{
  int i;
  
  for(i=0; i<n; i++) 
  {
    ht[i].weight = pFC[i].freq;
    ht[i].parent = 0;
    ht[i].lchild = 0;
    ht[i].rchild = 0;
  }
 
  for(; i<(2*n - 1); i++)
  {
    ht[i].weight = 0;
    ht[i].parent = 0;
    ht[i].lchild = 0;
    ht[i].rchild = 0;
  }
  
  return 0;
}

/*
将字符串转化为位串。
成功返回0,失败返回-1。
*/
int strtobit(unsigned short *transf, char *str, int n)
{
  int i;
  unsigned short tmp = 1, rst = 0;
  
  for(i=(n-1); i>=0; i--){
    if(str[i] == '1'){
      rst += tmp;
    }
    tmp *= 2;
  }
  
  *transf = rst;
  
  return 0;
}

/*
将transf的位串(16个)转换为字符串(16个+1个结尾符)。
位串中的高位放在字符串的低位,位串中的低位放在字符串的高位。
成功返回0,失败返回-1。
*/
int bittostr(char *str, int n, unsigned short transf)
{
  int i;
  unsigned short tmp;
  
  tmp = transf;
  for(i=0; ((i<n-1) && (tmp!=0)); i++)
  {
    if(tmp%2 == 0)
      str[i] = '0';
    else
      str[i] = '1';
    
    tmp = tmp/2;
  }
  
  if(i != (n-1)){
    for(; i<(n-1); i++)
      str[i] = '0';
  }
  
  str[n-1] = '\0';
  
  return 0;
}

/*得到n个带权值的字符的huffman编码表
int HuffmanCodeT_a(HuffmanTree ht, int n, FqCd *pFC)
{
  int i;
  int s1, s2;
  int f;
  char *cd;
  HuffmanCode hc;
  int c, start;

  //建huffman树
  for(i=n; i<(2*n - 1); i++)
  {
    select(ht, i-1, &s1, &s2);
    ht[s1].parent = i;
    ht[s2].parent = i;
    ht[i].lchild = s1;
    ht[i].rchild = s2;
    ht[i].weight = ht[s1].weight + ht[s2].weight;
  }
  
  hc = (HuffmanCode)malloc(n * sizeof(char *));
  cd = (char *)malloc(n * sizeof(char));
  cd[n-1] = '\0';
  for(i=0; i<n; i++)
  {
    start = n-1;
    for(c=i,f=ht[i].parent; f!=0; c=f,f=ht[f].parent)
      if( ht[f].lchild == c) 
        cd[--start]='0';  //左分支标0
      else 
        cd[--start]='1';  //右分支标1
      
      hc[i]=(char *)malloc((n-start)*sizeof(char));
      strcpy(hc[i], &cd[start]);
      
      pFC[i].cdlength = (n-start) - 1;                 //重要:计算出编码的长度
      strtobit(&pFC[i].code, hc[i], pFC[i].cdlength);   //重要:将字符串表示的编码转化为位串形式
  }
  
  free(cd);
/*
  for(i=0; i<n; i++)
    printf("%d: %s\n", i, hc[i]);

}
*/
/*得到n个带权值的字符的huffman编码表*/
int HuffmanCodeT_b(HuffmanTree ht, int n, FqCd *pFC)
{
  int i;
  int s1, s2;
  int f;
  char *cd;
  HuffmanCode hc;
  int c, start;

  //建huffman树
  for(i=n; i<(2*n - 1); i++)
  {
    select(ht, i-1, &s1, &s2);
    ht[s1].parent = i;
    ht[s2].parent = i;
    ht[i].lchild = s1;
    ht[i].rchild = s2;
    ht[i].weight = ht[s1].weight + ht[s2].weight;
  }
  
  //hc = (HuffmanCode)malloc(n * sizeof(char *));
  cd = (char *)malloc(n * sizeof(char));
  cd[n-1] = '\0';
  for(i=0; i<n; i++)
  {
    start = n-1;
    for(c=i,f=ht[i].parent; f!=0; c=f,f=ht[f].parent)
      if( ht[f].lchild == c) 
        cd[--start]='0';  //左分支标0
      else 
        cd[--start]='1';  //右分支标1
      
      pFC[i].code=(char *)malloc((n-start)*sizeof(char));
      strcpy(pFC[i].code, &cd[start]);
      
      pFC[i].cdlength = (n-start) - 1;                 //重要:计算出编码的长度
  }
  
  free(cd);
/*
  for(i=0; i<n; i++)
    printf("%d: %s\n", i, hc[i]);
*/
}

/*
根据ch的值,进行移位操作。
成功返回0,失败返回-1。
*/
int shiftbit(char *outbuf, char ch)
{
  if(ch == '0'){
    *outbuf = *outbuf << 1;
  }
  else{
    *outbuf = *outbuf << 1;
    *outbuf = *outbuf | 0x01;
  }
  
  return 0;
}

/*对文件进行编码,并返回最后一个字节的实际有效bit数*/
int flencode(FqCd *pFC, int *n)
{
  int i, j, outbufIndex=0, bitPtr=0;
  int loc;
  FILE *instrm, *outstrm;
  char inbuf[BUFFSIZE], outbuf[BUFFSIZE];
  
  instrm = fopen("in.txt", "r");
  if(instrm == NULL){
    //fprintf(stderr, "%s, %d: %s", __FILE__, __LINE__, "open file failed!");
    printf("%s, %d: %s", __FILE__, __LINE__, "open file failed!");
    exit(-1);
  }
  
  outstrm = fopen("out.txt", "a+");
  if(outstrm == NULL){
    //fprintf(stderr, "%s, %d: %s", __FILE__, __LINE__, "open file failed!");
    printf("%s, %d: %s", __FILE__, __LINE__, "open file failed!");
    exit(-1);
  }
  
  memset(outbuf, '\0', BUFFSIZE);
  while(fgets(inbuf, BUFFSIZE, instrm)){
    for(i=0; ((inbuf[i] != '\0') && (i<BUFFSIZE)); i++){
      if(SearchBin(&loc, inbuf[i], pFC, 0, ALLCHARNUM-1) != 0)
        SearchBin(&loc, '*', pFC, 0, ALLCHARNUM-1);
      
      for(j=0; (j<pFC[loc].cdlength) && (outbufIndex<BUFFSIZE); j++){
        if(bitPtr != 8){
          shiftbit(&outbuf[outbufIndex], pFC[loc].code[j]);
          bitPtr++;
        }
        else{
          outbufIndex++;
          bitPtr = 0;
          shiftbit(&outbuf[outbufIndex], pFC[loc].code[j]);
          bitPtr++;
        }
      }//for
      
      if(outbufIndex == BUFFSIZE){
        fwrite(outbuf, outbufIndex, sizeof(char), outstrm);
        outbufIndex = 0;
        memset(outbuf, '\0', BUFFSIZE);
      }
    }//for
  }//while
  
  //对最后一个字节进行处理
  if(bitPtr != 8){
    *n = bitPtr;
    outbuf[outbufIndex] = outbuf[outbufIndex] << (8-bitPtr);
  }
  //输出outbuf中的剩余的编码到文件中
  fwrite(outbuf, (outbufIndex+1), sizeof(char), outstrm);
  
  fclose(instrm);
  fclose(outstrm);
}

main()
{
  int i;
  FqCd *pFC;
  HuffmanTree ht;
  int restBit;
  
  allocFC(&pFC);
  initFC(pFC);

   
  for(i=0; i<ALLCHARNUM; i++)
    printf("pFC[%d].charactor = %c, pFC[%d].freq = %d, pFC[%d].code = %d\n", i, pFC[i].charactor, i, pFC[i].freq, i, pFC[i].code);

  frequency(pFC);
/*  
  for(i=0; i<ALLCHARNUM; i++)
    printf("pFC[%d].charactor = %c, pFC[%d].freq = %d, pFC[%d].code = %d\n", i, pFC[i].charactor, i, pFC[i].freq, i, pFC[i].code);
*/
  allocHT(&ht, ALLCHARNUM);
  initHT(ht, pFC, ALLCHARNUM);
  
/*  for(i=0; i<(2*ALLCHARNUM - 1); i++)
    printf("ht[%d].weight = %d, parent = %d, lchild = %d, rchild = %d\n", i, ht[i].weight, ht[i].parent, ht[i].lchild, ht[i].rchild);
*/
  HuffmanCodeT_b(ht, ALLCHARNUM, pFC);
 
  for(i=0; i<ALLCHARNUM; i++)
    printf("i=%d:pFC[i].charactor = %c, freq = %d, cdlength = %d, code = %s\n", i, pFC[i].charactor, pFC[i].freq, pFC[i].cdlength, pFC[i].code);
  
  flencode(pFC, &restBit);
  
  printf("restBit = %d\n", restBit);
  
  freemem(pFC);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -