📄 huffman_code.cpp

📁 huffman编码. 把一个英文字母,空格,句号.一共有28个 character. 先求每个字符出现的频率. 然后用频率对这个文件进行哈夫曼编码. 然后再进行解码. 运行的时候需要在V
💻 CPP
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define   N   28

typedef struct { //array of the character frequency
 char  data;
 int  count;
 float  frequent;
} character ;

typedef struct { //array of huffman tree  
 char data;
 float weight;
 int  lchild, rchild, parent;
} hufmtree;


typedef char **hufmcode;  //list of huffman encoding  

hufmtree *Huffman_tree(int n, character *ch);
hufmcode Huffman_Coding(hufmtree *ht, int n);
character *Frequent_of_character(FILE *fp);
int prn_char(character *ch);
void prn_tree(hufmtree *ht);
FILE *file_open(const char *dir);
void Coding_file(hufmcode hc, const char *dir, const char *dir_code);
void Uncoding_file(hufmtree *ht, const char *dir_code, const char *dir_re, int m);


int main(int argc, char *argv[])
{
 FILE *fp;
 character *ch;
 hufmtree *ht;
 hufmcode hc;
 int m, n;
 
 const char *dir = argv[1];   
 const char *dir_code = argv[2];  
 const char *dir_re = argv[3];  
 
 fp = file_open(dir); 
 ch = Frequent_of_character(fp);
 n = prn_char(ch);
 m = 2 * n - 1;
 
 ht = Huffman_tree(n, ch); 
 hc = Huffman_Coding(ht, n);

 Coding_file( hc, dir, dir_code);
 Uncoding_file(ht, dir_code, dir_re, m); 
 fclose(fp);

 return 0;
}


hufmtree *Huffman_tree(int n, character *ch)
{//according to the frequency ,build huffman tree.
 hufmtree *ht;
 int   i, j, p1, p2, m;
 float   small1, small2; 
 
 m = 2 * n - 1;  //number of all node=number of leaf node * 2 - 1
 ht = (hufmtree *)malloc((m ) * sizeof(hufmtree)); //huffman tree ,(m+1)means begin with 1
 memset(ht, '\0', m*sizeof(hufmtree));
 if(ht == NULL) printf("Memory allocate error.\n");
 for(i = 1; i <= m ; i++) {     //initialize all of the huffman tree crunode 
  ht[i].data = ch[i].data;    //对应字符赋值
  ht[i].weight = 0.0;     //
  ht[i].lchild = ht[i].rchild = ht[i].parent = 0;
 }
 putchar('\n');
 for(i = 1; i <= n; i++){    //叶子结点权值初始化
  ht[i].weight = ch[i].frequent;
 }
 printf("Build huffman tree:\n");
 printf("The number of leaf nodes n = %d, The number of all nodes m = %d\n", n, m);
 for(i = n + 1; i <= m; i++){  //build huffman tree
  p1 = 0;
  p2 = 0;
  small1 = small2 = 1;
  for(j = 1; j <= i - 1; j++){   //find out the most small one,and the hypo- small one 
   if(ht[j].parent == 0){
    if(ht[j].weight < small1){
     small2 = small1;
     small1 = ht[j].weight;
     p2 = p1;
     p1 = j;
    }
    else if(ht[j].weight < small2){
     small2 = ht[j].weight;
     p2 = j;
    }
   }
  }
  ht[p1].parent = i ;  
  ht[p2].parent = i ;  
  ht[i].lchild = p1 ;  //left child is the most small one 
  ht[i].rchild = p2 ;  //right child is the hypo- small one
  ht[i].weight = ht[p1].weight + ht[p2].weight; 
 }
 return ht;
}

hufmcode Huffman_Coding(hufmtree *ht, int n)
{//huffman encoding
 int   i, c, start, f;
 char ** hc;
 char *cd;
   //getchar();  
// hc = (hufmcode)malloc((n + 1) * sizeof(char *)); //initialization of the encoding list 
 hc = (char **)malloc((n + 1) * sizeof(char *)); //initialization of the encoding list 
 cd = (char *)malloc(n * sizeof(char));   //temporary encoding list 
 
 cd[n - 1] = '\0';
 printf("\nMaking huffman encoding:\n");
 for(i = 1; i <= n; i++){    //Make huffman encoding
  start = n - 1;
  for(c = i, f = ht[i].parent; f != 0; c = f, f = ht[f ].parent){
   if(ht[f].lchild == c)  { cd[--start] = '0'; }
   else { cd[--start] = '1'; }

  }   
  hc[i] = (char *)malloc((n - start) * sizeof(char));  
  strcpy( &hc[i][1], &cd[start]);  
  hc[i][0] = ht[i].data;    
 } 
 putchar('\n');
 for(i = 1; i <= n; i++){
  printf("hc[%d]:  %c    %s\n", i, hc[i][0], &hc[i][1]);
 }
 return hc;
}

character *Frequent_of_character(FILE *fp)
{//caculate frequency 
 int i, last, length = 0;
 character *ch;
 char c;
 
 ch = (character *)malloc( N * sizeof(character));
 memset(ch, '\0', N * sizeof(character));
 printf("\n\nPrint out original text file:\n");
 while( (c = fgetc(fp)) != EOF) {
	 if(c!='\n') {
		printf("%c", c);
		length++;
	 }
 }

 printf("\n\nLength of the original text file:\n");
 printf("\nlength = %d\n\n", length);
 
 fseek(fp, 0, SEEK_SET);
 last = 0;    //当前字符种类数
 while((c = fgetc(fp)) != EOF) {
  i = 0;  
  while( c != ch[i].data && i <= last && last <= N) { //verdict whether there will be occur new character or overflow breed of character
   i++;
  }
  if(i > last){  //occur new character
   last = i;
   ch[last].data = c;
   ch[last].count++;
  }
  else if( c == ch[i].data){ //already appeared character
   ch[i].count++;
  }
  else {   //bread of character > N 
   printf("字符种类大于N.\n");
  }
 }
 for(i = 1; ch[i].count != 0; i++){  //caculate frequency of each characters,by way of weight
  ch[i].frequent = (float)((float)ch[i].count / (float)length);
 }
 return ch;
}

int prn_char(character *ch)
{//Print out occurrence of characters
 int i, count;
 
 printf("The number of characters and frequency:\n");
 printf("sequence  character count  frequency\n");
 for(i = 1; ch[i].count != 0; i++){
  printf("ch[%d] :   %c    %d    %f \n", i, ch[i].data, ch[i].count, ch[i].frequent);
 }
 count = i - 1;
 return count;
}
 
FILE *file_open(const char *dir)
{//open file 
 FILE *fp;
 
 fp = fopen( dir, "r" );
 if(fp == NULL) printf("File %s open Error\n", dir);
 return fp;
}

void Coding_file(hufmcode hc, const char *dir, const char *dir_code)
{
 int i;
 FILE *fp1, *fp2;
 char c;
 
 fp1 = file_open( dir);
 fp2 = fopen( dir_code, "w" );
 if(fp2 == NULL) printf("File %s open ERROR.\n", dir_code);
 
 printf("Encoding text file:\n");
 while((c = fgetc(fp1)) != EOF) { 
  for( i = 1; hc[i][0] != c; i++) ;  //Find out characters from encoding list
  fputs( &hc[i][1], fp2);    //将字符对应的编码写入fp2指向的文件
  printf("%s", &hc[i][1]);
 }
 fclose(fp1);
 fclose(fp2);
}

void Uncoding_file(hufmtree *ht, const char *dir_code, const char *dir_re, int m)
{//decoding the encoded file 
 int i;
 char c;
 FILE *fp1, *fp2;
 
 i = m;   //from the root of the tree
 fp1 = file_open( dir_code);  //encoded file
 fp2 = fopen( dir_re , "w"); //decoding file
 if(fp2 == NULL) printf("File %s open error.\n", dir_re);
 
 c = fgetc(fp1);   //read one encoding character
 printf("\n将编码文件翻译成明文:\n");
 while(c != EOF) {  //将编码文件翻译成明文
  if(c == '0') {
   i = ht[i].lchild ;
  }
  else if(c == '1') {
   i = ht[i].rchild ;
  }
  else printf("Uncoding ERROR.\n");
  if(ht[i].lchild == 0) {  //走到叶子结点,将其对应字符写入fp2指向的文件
   putchar(ht[i].data);
   fputc( ht[i].data, fp2);
   i = m;   //restart from root
  }
  c = fgetc(fp1);
 }
 fclose(fp1);
 fclose(fp2);
}
💿 文件大小 3 K
👤 上传用户 horse2000
📂 所属分类压缩解压
🏷️ 相关标签

#character #huffman #编码 #频率
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -