📄 hf.c
字号:
/***********************************************************************
* 文件名称: %M%
* 文件描述: huffuman编码解码程序
* 文件版本: %I%
* 时 间: %D%
* 函数列表:
* 作 者: 伍一达
* 审 核: 伍一达
* 修订历史:
07/10/26 9:54 by wuyida
07/11/02 3:40 by wuyida
***********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
1,仅对a~z,A~Z,0~9,和常见符号'\n','\r',' ','"',''',',','.',':','?'出现的次数进行统计和编码。
26个大写,26个小写,10个数字,9个符号,其他。
2,各字符的十进制数字表示
'\n' 10 LF
'\r' 13 CR
' ' 32
'"' 34
''' 39
',' 44
'.' 46
':' 58
'?' 63
3,\n含义是换行,将当前位置移到下一行的开头 '\n'的ASCII码是10
\r含义是回车,将当前位置移动到本行的开头 '\r'的ASCII码是13
*/
#define LOWCASENUM 26
#define UPCASENUM 26
#define NUMBERNUM 10
#define PUNCNUM 9
#define ALLCHARNUM ((LOWCASENUM + UPCASENUM + NUMBERNUM + PUNCNUM) + 1) //操他妈的死bug,没加括号
#define BUFFSIZE 255
#define STR (16+1) //sizeof(unsigned short)+1
/*字母在文章中出现的次数的对应的编码结构,主结构*/
typedef struct{
char charactor;
int cdlength;
unsigned short code;
int freq;
}FqCd_a;
/*字母在文章中出现的次数的对应的编码结构,主结构*/
typedef struct{
char charactor;
int cdlength;
char *code;
int freq;
}FqCd;
typedef struct
{
int weight;
int parent, lchild, rchild ;
}HTNode, *HuffmanTree;
typedef char **HuffmanCode;
/*动态分配主结构的存储空间。成功返回0,失败返回-1*/
int allocFC(FqCd **ppFC)
{
*ppFC = (FqCd *)malloc(ALLCHARNUM * sizeof(FqCd));
if(*ppFC == NULL){
printf("%s, %d: %s", __FILE__, __LINE__, "alloc failed!");
exit(-1);
}
return 0;
}
/*释放分配的存储空间。成功返回0,失败返回-1*/
int freemem(FqCd *pFC)
{
free(pFC);
return 0;
}
/*初始化主结构,成功返回0,失败返回-1*/
int initFC(FqCd *pFC)
{
int i, j;
/*将符号存入主结构中*/
pFC[0].charactor = '\n';
pFC[1].charactor = '\r';
pFC[2].charactor = ' ';
pFC[3].charactor = '"';
pFC[4].charactor = '\'';
pFC[5].charactor = '*';
pFC[6].charactor = ',';
pFC[7].charactor = '.';
/*将数字存入主结构中*/
for(i=8, j=0; j<NUMBERNUM; i++, j++)
{
pFC[i].charactor = '0' + j;
}
/*将符号存入主结构中*/
pFC[i++].charactor = ':';
pFC[i++].charactor = '?';
/*将大写字母存入主结构中*/
for(j=0; j<UPCASENUM; i++, j++)
{
pFC[i].charactor = 'A' + j;
}
/*将小写字母存入主结构中*/
for(j=0; j<LOWCASENUM; i++, j++)
{
pFC[i].charactor = 'a' + j;
}
/*对主结构中的成员freq进行初始化*/
for(i=0; i<ALLCHARNUM; i++){
pFC[i].freq = 0;
pFC[i].cdlength = 0;
pFC[i].code = '\0';
}
return 0;
}
/*折半查找法,结果放入loc中。成功返回0,失败返回-1。*/
int SearchBin(int *loc, char key, FqCd *pFC, int low, int high)
{
int mid;
if(pFC == NULL){
printf("%s, %d: %s", __FILE__, __LINE__, "para error!");
exit(-1);
}
if(loc == NULL){
printf("%s, %d: %s", __FILE__, __LINE__, "para error!");
exit(-1);
}
while(low <= high){
mid = (low + high)/2;
if(key == pFC[mid].charactor){
*loc = mid;
return 0;
}
else if(key > pFC[mid].charactor)
low = mid + 1;
else
high = mid - 1;
}
return -1;
}
/*对相应字符的频率增一,成功返回0,失败返回-1。*/
int addfreq(char ch, FqCd *pFC, int low, int high)
{
int loc;
if(SearchBin(&loc, ch, pFC, low, high) != 0){
printf("%s, %d: %s", __FILE__, __LINE__, "searching failed!");
exit(-1);
}
else{
pFC[loc].freq += 1;
}
return 0;
}
/*统计文件中各字母出现的频率,成功返回0,失败返回-1。*/
int frequency(FqCd *pFC)
{
int i;
FILE *stream;
char buf[BUFFSIZE];
stream = fopen("in0.txt", "r");
if(stream == NULL){
//fprintf(stderr, "%s, %d: %s", __FILE__, __LINE__, "open file failed!");
printf("%s, %d: %s", __FILE__, __LINE__, "open file failed!");
exit(-1);
}
/*
for(i=0; i<ALLCHARNUM; i++)
printf("pFC[%d].charactor = %c, pFC[%d].freq = %d, pFC[%d].code = %d\n", i, pFC[i].charactor, i, pFC[i].freq, i, pFC[i].code);
*/
/*memset(buf, '\0', BUFFSIZE);*/
while(fgets(buf, BUFFSIZE, stream)){
for(i=0; ((buf[i] != '\0') && (i<BUFFSIZE)); i++){
if((buf[i]>='a')&&(buf[i]<='z')){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if((buf[i] >= 'A')&&(buf[i] <= 'Z')){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if((buf[i] >= '0')&&(buf[i] <= '9')){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == '\n'){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == '\r'){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == ' '){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == '"'){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == '\''){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == ','){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == '.'){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == ':'){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else if(buf[i] == '?'){
addfreq(buf[i], pFC, 0, ALLCHARNUM-1);
}
else
addfreq('*', pFC, 0, ALLCHARNUM-1);
}
}
fclose(stream);
return 0;
}
/*在0-n范围内选择出两个parent值为零的最小的数,将它们对应的index值赋给s1,s2。成功返回0,失败返回-1。*/
int select(HuffmanTree ht, int n, int *s1, int *s2)
{
int i;
int minindex, minvalue;
int flag;
//选择节点parent值为零的第一个最小节点的序号
flag = 0;
for(i=0; i<=n; i++){
if(ht[i].parent == 0){
if(flag == 0){
minindex = i;
minvalue = ht[i].weight;
flag = 1;
continue;
}
if(ht[i].weight < minvalue)
{
minindex = i;
minvalue = ht[i].weight;
}
}
}//for
*s1 = minindex;
//选择节点parent值为零的第二个最小节点的序号
flag = 0;
for(i=0; i<=n; i++){
if((i!=*s1) && (ht[i].parent==0)){
if(flag == 0){
minindex = i;
minvalue = ht[i].weight;
flag = 1;
continue;
}
if((i!=*s1) && (ht[i].weight<minvalue))
{
minindex = i;
minvalue = ht[i].weight;
}
}
}//for
*s2 = minindex;
return 0;
}
/*给huffman树分配动态空间。成功返回0,失败返回-1。*/
int allocHT(HuffmanTree *pht, int n)
{
*pht = (HuffmanTree)malloc((2*n - 1) * sizeof(HTNode));
if(*pht == NULL){
printf("%s, %d: %s", __FILE__, __LINE__, "alloc failed!");
exit(-1);
}
return 0;
}
/*初始化huffman树。成功返回0,失败返回-1。*/
int initHT(HuffmanTree ht, FqCd *pFC, int n)
{
int i;
for(i=0; i<n; i++)
{
ht[i].weight = pFC[i].freq;
ht[i].parent = 0;
ht[i].lchild = 0;
ht[i].rchild = 0;
}
for(; i<(2*n - 1); i++)
{
ht[i].weight = 0;
ht[i].parent = 0;
ht[i].lchild = 0;
ht[i].rchild = 0;
}
return 0;
}
/*
将字符串转化为位串。
成功返回0,失败返回-1。
*/
int strtobit(unsigned short *transf, char *str, int n)
{
int i;
unsigned short tmp = 1, rst = 0;
for(i=(n-1); i>=0; i--){
if(str[i] == '1'){
rst += tmp;
}
tmp *= 2;
}
*transf = rst;
return 0;
}
/*
将transf的位串(16个)转换为字符串(16个+1个结尾符)。
位串中的高位放在字符串的低位,位串中的低位放在字符串的高位。
成功返回0,失败返回-1。
*/
int bittostr(char *str, int n, unsigned short transf)
{
int i;
unsigned short tmp;
tmp = transf;
for(i=0; ((i<n-1) && (tmp!=0)); i++)
{
if(tmp%2 == 0)
str[i] = '0';
else
str[i] = '1';
tmp = tmp/2;
}
if(i != (n-1)){
for(; i<(n-1); i++)
str[i] = '0';
}
str[n-1] = '\0';
return 0;
}
/*得到n个带权值的字符的huffman编码表
int HuffmanCodeT_a(HuffmanTree ht, int n, FqCd *pFC)
{
int i;
int s1, s2;
int f;
char *cd;
HuffmanCode hc;
int c, start;
//建huffman树
for(i=n; i<(2*n - 1); i++)
{
select(ht, i-1, &s1, &s2);
ht[s1].parent = i;
ht[s2].parent = i;
ht[i].lchild = s1;
ht[i].rchild = s2;
ht[i].weight = ht[s1].weight + ht[s2].weight;
}
hc = (HuffmanCode)malloc(n * sizeof(char *));
cd = (char *)malloc(n * sizeof(char));
cd[n-1] = '\0';
for(i=0; i<n; i++)
{
start = n-1;
for(c=i,f=ht[i].parent; f!=0; c=f,f=ht[f].parent)
if( ht[f].lchild == c)
cd[--start]='0'; //左分支标0
else
cd[--start]='1'; //右分支标1
hc[i]=(char *)malloc((n-start)*sizeof(char));
strcpy(hc[i], &cd[start]);
pFC[i].cdlength = (n-start) - 1; //重要:计算出编码的长度
strtobit(&pFC[i].code, hc[i], pFC[i].cdlength); //重要:将字符串表示的编码转化为位串形式
}
free(cd);
/*
for(i=0; i<n; i++)
printf("%d: %s\n", i, hc[i]);
}
*/
/*得到n个带权值的字符的huffman编码表*/
int HuffmanCodeT_b(HuffmanTree ht, int n, FqCd *pFC)
{
int i;
int s1, s2;
int f;
char *cd;
HuffmanCode hc;
int c, start;
//建huffman树
for(i=n; i<(2*n - 1); i++)
{
select(ht, i-1, &s1, &s2);
ht[s1].parent = i;
ht[s2].parent = i;
ht[i].lchild = s1;
ht[i].rchild = s2;
ht[i].weight = ht[s1].weight + ht[s2].weight;
}
//hc = (HuffmanCode)malloc(n * sizeof(char *));
cd = (char *)malloc(n * sizeof(char));
cd[n-1] = '\0';
for(i=0; i<n; i++)
{
start = n-1;
for(c=i,f=ht[i].parent; f!=0; c=f,f=ht[f].parent)
if( ht[f].lchild == c)
cd[--start]='0'; //左分支标0
else
cd[--start]='1'; //右分支标1
pFC[i].code=(char *)malloc((n-start)*sizeof(char));
strcpy(pFC[i].code, &cd[start]);
pFC[i].cdlength = (n-start) - 1; //重要:计算出编码的长度
}
free(cd);
/*
for(i=0; i<n; i++)
printf("%d: %s\n", i, hc[i]);
*/
}
/*
根据ch的值,进行移位操作。
成功返回0,失败返回-1。
*/
int shiftbit(char *outbuf, char ch)
{
if(ch == '0'){
*outbuf = *outbuf << 1;
}
else{
*outbuf = *outbuf << 1;
*outbuf = *outbuf | 0x01;
}
return 0;
}
/*对文件进行编码,并返回最后一个字节的实际有效bit数*/
int flencode(FqCd *pFC, int *n)
{
int i, j, outbufIndex=0, bitPtr=0;
int loc;
FILE *instrm, *outstrm;
char inbuf[BUFFSIZE], outbuf[BUFFSIZE];
instrm = fopen("in.txt", "r");
if(instrm == NULL){
//fprintf(stderr, "%s, %d: %s", __FILE__, __LINE__, "open file failed!");
printf("%s, %d: %s", __FILE__, __LINE__, "open file failed!");
exit(-1);
}
outstrm = fopen("out.txt", "a+");
if(outstrm == NULL){
//fprintf(stderr, "%s, %d: %s", __FILE__, __LINE__, "open file failed!");
printf("%s, %d: %s", __FILE__, __LINE__, "open file failed!");
exit(-1);
}
memset(outbuf, '\0', BUFFSIZE);
while(fgets(inbuf, BUFFSIZE, instrm)){
for(i=0; ((inbuf[i] != '\0') && (i<BUFFSIZE)); i++){
if(SearchBin(&loc, inbuf[i], pFC, 0, ALLCHARNUM-1) != 0)
SearchBin(&loc, '*', pFC, 0, ALLCHARNUM-1);
for(j=0; (j<pFC[loc].cdlength) && (outbufIndex<BUFFSIZE); j++){
if(bitPtr != 8){
shiftbit(&outbuf[outbufIndex], pFC[loc].code[j]);
bitPtr++;
}
else{
outbufIndex++;
bitPtr = 0;
shiftbit(&outbuf[outbufIndex], pFC[loc].code[j]);
bitPtr++;
}
}//for
if(outbufIndex == BUFFSIZE){
fwrite(outbuf, outbufIndex, sizeof(char), outstrm);
outbufIndex = 0;
memset(outbuf, '\0', BUFFSIZE);
}
}//for
}//while
//对最后一个字节进行处理
if(bitPtr != 8){
*n = bitPtr;
outbuf[outbufIndex] = outbuf[outbufIndex] << (8-bitPtr);
}
//输出outbuf中的剩余的编码到文件中
fwrite(outbuf, (outbufIndex+1), sizeof(char), outstrm);
fclose(instrm);
fclose(outstrm);
}
main()
{
int i;
FqCd *pFC;
HuffmanTree ht;
int restBit;
allocFC(&pFC);
initFC(pFC);
for(i=0; i<ALLCHARNUM; i++)
printf("pFC[%d].charactor = %c, pFC[%d].freq = %d, pFC[%d].code = %d\n", i, pFC[i].charactor, i, pFC[i].freq, i, pFC[i].code);
frequency(pFC);
/*
for(i=0; i<ALLCHARNUM; i++)
printf("pFC[%d].charactor = %c, pFC[%d].freq = %d, pFC[%d].code = %d\n", i, pFC[i].charactor, i, pFC[i].freq, i, pFC[i].code);
*/
allocHT(&ht, ALLCHARNUM);
initHT(ht, pFC, ALLCHARNUM);
/* for(i=0; i<(2*ALLCHARNUM - 1); i++)
printf("ht[%d].weight = %d, parent = %d, lchild = %d, rchild = %d\n", i, ht[i].weight, ht[i].parent, ht[i].lchild, ht[i].rchild);
*/
HuffmanCodeT_b(ht, ALLCHARNUM, pFC);
for(i=0; i<ALLCHARNUM; i++)
printf("i=%d:pFC[i].charactor = %c, freq = %d, cdlength = %d, code = %s\n", i, pFC[i].charactor, pFC[i].freq, pFC[i].cdlength, pFC[i].code);
flencode(pFC, &restBit);
printf("restBit = %d\n", restBit);
freemem(pFC);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -