📄 huffman_code.cpp
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 28
typedef struct { //array of the character frequency
char data;
int count;
float frequent;
} character ;
typedef struct { //array of huffman tree
char data;
float weight;
int lchild, rchild, parent;
} hufmtree;
typedef char **hufmcode; //list of huffman encoding
hufmtree *Huffman_tree(int n, character *ch);
hufmcode Huffman_Coding(hufmtree *ht, int n);
character *Frequent_of_character(FILE *fp);
int prn_char(character *ch);
void prn_tree(hufmtree *ht);
FILE *file_open(const char *dir);
void Coding_file(hufmcode hc, const char *dir, const char *dir_code);
void Uncoding_file(hufmtree *ht, const char *dir_code, const char *dir_re, int m);
int main(int argc, char *argv[])
{
FILE *fp;
character *ch;
hufmtree *ht;
hufmcode hc;
int m, n;
const char *dir = argv[1];
const char *dir_code = argv[2];
const char *dir_re = argv[3];
fp = file_open(dir);
ch = Frequent_of_character(fp);
n = prn_char(ch);
m = 2 * n - 1;
ht = Huffman_tree(n, ch);
hc = Huffman_Coding(ht, n);
Coding_file( hc, dir, dir_code);
Uncoding_file(ht, dir_code, dir_re, m);
fclose(fp);
return 0;
}
hufmtree *Huffman_tree(int n, character *ch)
{//according to the frequency ,build huffman tree.
hufmtree *ht;
int i, j, p1, p2, m;
float small1, small2;
m = 2 * n - 1; //number of all node=number of leaf node * 2 - 1
ht = (hufmtree *)malloc((m ) * sizeof(hufmtree)); //huffman tree ,(m+1)means begin with 1
memset(ht, '\0', m*sizeof(hufmtree));
if(ht == NULL) printf("Memory allocate error.\n");
for(i = 1; i <= m ; i++) { //initialize all of the huffman tree crunode
ht[i].data = ch[i].data; //对应字符赋值
ht[i].weight = 0.0; //
ht[i].lchild = ht[i].rchild = ht[i].parent = 0;
}
putchar('\n');
for(i = 1; i <= n; i++){ //叶子结点权值初始化
ht[i].weight = ch[i].frequent;
}
printf("Build huffman tree:\n");
printf("The number of leaf nodes n = %d, The number of all nodes m = %d\n", n, m);
for(i = n + 1; i <= m; i++){ //build huffman tree
p1 = 0;
p2 = 0;
small1 = small2 = 1;
for(j = 1; j <= i - 1; j++){ //find out the most small one,and the hypo- small one
if(ht[j].parent == 0){
if(ht[j].weight < small1){
small2 = small1;
small1 = ht[j].weight;
p2 = p1;
p1 = j;
}
else if(ht[j].weight < small2){
small2 = ht[j].weight;
p2 = j;
}
}
}
ht[p1].parent = i ;
ht[p2].parent = i ;
ht[i].lchild = p1 ; //left child is the most small one
ht[i].rchild = p2 ; //right child is the hypo- small one
ht[i].weight = ht[p1].weight + ht[p2].weight;
}
return ht;
}
hufmcode Huffman_Coding(hufmtree *ht, int n)
{//huffman encoding
int i, c, start, f;
char ** hc;
char *cd;
//getchar();
// hc = (hufmcode)malloc((n + 1) * sizeof(char *)); //initialization of the encoding list
hc = (char **)malloc((n + 1) * sizeof(char *)); //initialization of the encoding list
cd = (char *)malloc(n * sizeof(char)); //temporary encoding list
cd[n - 1] = '\0';
printf("\nMaking huffman encoding:\n");
for(i = 1; i <= n; i++){ //Make huffman encoding
start = n - 1;
for(c = i, f = ht[i].parent; f != 0; c = f, f = ht[f ].parent){
if(ht[f].lchild == c) { cd[--start] = '0'; }
else { cd[--start] = '1'; }
}
hc[i] = (char *)malloc((n - start) * sizeof(char));
strcpy( &hc[i][1], &cd[start]);
hc[i][0] = ht[i].data;
}
putchar('\n');
for(i = 1; i <= n; i++){
printf("hc[%d]: %c %s\n", i, hc[i][0], &hc[i][1]);
}
return hc;
}
character *Frequent_of_character(FILE *fp)
{//caculate frequency
int i, last, length = 0;
character *ch;
char c;
ch = (character *)malloc( N * sizeof(character));
memset(ch, '\0', N * sizeof(character));
printf("\n\nPrint out original text file:\n");
while( (c = fgetc(fp)) != EOF) {
if(c!='\n') {
printf("%c", c);
length++;
}
}
printf("\n\nLength of the original text file:\n");
printf("\nlength = %d\n\n", length);
fseek(fp, 0, SEEK_SET);
last = 0; //当前字符种类数
while((c = fgetc(fp)) != EOF) {
i = 0;
while( c != ch[i].data && i <= last && last <= N) { //verdict whether there will be occur new character or overflow breed of character
i++;
}
if(i > last){ //occur new character
last = i;
ch[last].data = c;
ch[last].count++;
}
else if( c == ch[i].data){ //already appeared character
ch[i].count++;
}
else { //bread of character > N
printf("字符种类大于N.\n");
}
}
for(i = 1; ch[i].count != 0; i++){ //caculate frequency of each characters,by way of weight
ch[i].frequent = (float)((float)ch[i].count / (float)length);
}
return ch;
}
int prn_char(character *ch)
{//Print out occurrence of characters
int i, count;
printf("The number of characters and frequency:\n");
printf("sequence character count frequency\n");
for(i = 1; ch[i].count != 0; i++){
printf("ch[%d] : %c %d %f \n", i, ch[i].data, ch[i].count, ch[i].frequent);
}
count = i - 1;
return count;
}
FILE *file_open(const char *dir)
{//open file
FILE *fp;
fp = fopen( dir, "r" );
if(fp == NULL) printf("File %s open Error\n", dir);
return fp;
}
void Coding_file(hufmcode hc, const char *dir, const char *dir_code)
{
int i;
FILE *fp1, *fp2;
char c;
fp1 = file_open( dir);
fp2 = fopen( dir_code, "w" );
if(fp2 == NULL) printf("File %s open ERROR.\n", dir_code);
printf("Encoding text file:\n");
while((c = fgetc(fp1)) != EOF) {
for( i = 1; hc[i][0] != c; i++) ; //Find out characters from encoding list
fputs( &hc[i][1], fp2); //将字符对应的编码写入fp2指向的文件
printf("%s", &hc[i][1]);
}
fclose(fp1);
fclose(fp2);
}
void Uncoding_file(hufmtree *ht, const char *dir_code, const char *dir_re, int m)
{//decoding the encoded file
int i;
char c;
FILE *fp1, *fp2;
i = m; //from the root of the tree
fp1 = file_open( dir_code); //encoded file
fp2 = fopen( dir_re , "w"); //decoding file
if(fp2 == NULL) printf("File %s open error.\n", dir_re);
c = fgetc(fp1); //read one encoding character
printf("\n将编码文件翻译成明文:\n");
while(c != EOF) { //将编码文件翻译成明文
if(c == '0') {
i = ht[i].lchild ;
}
else if(c == '1') {
i = ht[i].rchild ;
}
else printf("Uncoding ERROR.\n");
if(ht[i].lchild == 0) { //走到叶子结点,将其对应字符写入fp2指向的文件
putchar(ht[i].data);
fputc( ht[i].data, fp2);
i = m; //restart from root
}
c = fgetc(fp1);
}
fclose(fp1);
fclose(fp2);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -