📄 encode.cpp
字号:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
#define maxsize 256
#define maxweight 2.0
typedef struct element{
float weight;
float EffCodeLenght;
unsigned char ch;
unsigned char* code;
int codelen;
}Element;
typedef Element aType[maxsize];
typedef struct node *Position;
typedef struct node{
Element el;
char pst;
int rwo, line, property;//property标记节点性质: 0代表初始节点,1代表叶结点,2代表枝节点,3代表前缀节点,4代表后缀节点
int depth;//用于记录节点深度
Position left,right,father,link;
Position leaflink,leaffather;
}Node;
//统计文本
void Text(int &n, aType &A, FILE *f_source, FILE *f_code)
{
unsigned char c = 0;
bool newchar = 1; //判断当前字符在被读入前是否已被基本字符表收录
int i;
unsigned long TextLength = 0;
n = 0; //存放基本字符表长度
A[n].ch = getc(f_source); //采集第一个字符
TextLength++;
A[n].weight = 1;
n++;
while(!feof(f_source))
{
c = getc(f_source);
for(i = 0; i<n; i++) //遍历已存入的基本字符表
if(A[i].ch == c) //当前字符已被收入基本字符表
{
A[i].weight += 1; //权数加一
newchar=0; //非新字符
break; //跳出循环
}
if(newchar) //当前字符不在基本字符表中
{
A[i].ch = c; //字符加入基本字符表
n++;
A[i].weight = 1;
}
newchar = 1;
TextLength++; //除去文本结束符
} //形成了基本字符列表,文本长度,基本字符权数
fwrite(&TextLength, 4, 1, f_code);//在头上写入文件长度
fwrite(&n, 4, 1, f_code);//在头上写字符个数
for(i = 0; i < n; i++){ //计算权重,同时输出基本字符列表及权重
A[i].weight /= (float)TextLength;
if(A[i].weight < 0.00003)
A[i].weight = (float)0.00003;
fwrite(&A[i], sizeof(struct element), 1, f_code);
}
rewind(f_source); //文件指针复位
}
//////////////////////////////////////////////////////////////////////*/
//概率相等,按深度排序
void InsertByDepth(Position &q1,Position &q2,Position &p, bool &flag, bool Order)
{
bool flag2 = 0;
while(!flag2){
if(p->depth < q1->depth){
p->link = q1;
q1 = p;
flag2 = 1;
if(Order)
p->leaflink = p->link;
}
else if((p->depth >= q1->depth && p->depth < q2->depth) || p->el.weight < q2->el.weight)
{
p->link = q2;
q1->link =p;
flag2 = 1;
if(Order){
q1->leaflink = q1->link;
p->leaflink = p->link;
}
}
else
{
q1 = q2;
q2 = q1->link;
}
}
flag = 1;
}
////////////////////////////////////////////////////////////////////////////////////////////*/
//节点有序插入函数
void Insert(bool Order, Position &p, Position &head)
{
Position q1, q2;
bool flag = 0;
q1 = head;
q2 = q1->link;
while(!flag){
if(p->el.weight < head->el.weight){//概率最小
p->link = q1;
head = p;
flag = 1;
}
else if(q2 == NULL){//概率最大
q1->link = p;
p->link = NULL;
if(Order)
q1->leaflink = q1->link; //仅排序时用
flag = 1;
}
else if(p->el.weight > q1->el.weight && p->el.weight < q2->el.weight){
q1->link = p;
p->link = q2;
if(Order){
q1->leaflink = q1->link; //仅排序时用
p->leaflink = p->link;
}
flag = 1;
}
else if(p->el.weight == q1->el.weight)//概率相同按深度排序
InsertByDepth(q1,q2,p,flag,Order);
else{//没有找到合适的位置,下移一位
q1 = q2;
q2 = q1->link;
}
if(Order)
p->leaflink = p->link; //使叶节点链表指针有效
}
}
////////////////////////////////////////////////////////////////////////////
//生成huffman树
void Huffman(aType &A, int n, Position &head, Position &codelist)
{
int i;
Position p;
bool Order=1;
Position head2=new(Node);
head = new(Node);
codelist = new(Node);
head ->el.weight = maxweight;
head ->link = head->leaflink = head->leaffather = NULL;
for(i=0;i<n;i++){
p = new(Node);
p ->el = A[i];
p->depth = 1;//初始深度为1
p ->left = p ->right = NULL;
Insert(Order,p,head);
}
codelist = head;//根据概率大小排序以后的一维链表头,只是每一个叶结点的链接
Order = 0;
for(i = 1; i < n; i++){
head2 = head ->link;
p = new(Node);
p->el.weight = head->el.weight + head2->el.weight;
p->left = head; p->right = head2;
p->depth = ( head->depth >= head2->depth ) ? ( head->depth*2+head2->depth) : ( head2->depth*2+head->depth );
head->father = head2->father = p;
head->pst = '0'; head2->pst = '1';//左树枝标0,右树枝标1
head = head2->link;
Insert(Order,p,head);
}
head ->father = NULL;
}
/////////////////////////////////////////////////////////////////////////////
//为每个字符编码/////////////////////////////////////////////////////////////
void ProduceCodeList(int n, Position codelist, Position head)
{
Position pleaf = new(Node), pjunk = new(Node);
unsigned char *InverseCode = new unsigned char[n+1];
int CodeLength = 0;
int count_1_length = 0;//计连续1的长度
int max_1_length = 0;//记录连续1的个数
int value_1_count = 0;//记录除全1码以外的最长的连续1的个数
int value_1_code = 0;//判断全1码
int i = 0;
InverseCode[0] = '\0';
pleaf = codelist;
while(pleaf->leaflink){
int all_1_code = 1;
pjunk = pleaf;
while(pjunk->father){
CodeLength++;
InverseCode[CodeLength] = pjunk->pst;
if(pjunk->pst == '1')
{
count_1_length++;
if(count_1_length > max_1_length)
max_1_length = count_1_length;
}
else
{
count_1_length = 0;
all_1_code = 0;
}
pjunk = pjunk->father;
}
if(all_1_code == 0){
if(max_1_length > value_1_count)
value_1_count = max_1_length;
}
else max_1_length = 0;
count_1_length = 0;
if(all_1_code == 1)//全1码长度,而且只有一个全1码,所以不必初始化
value_1_code = CodeLength;
pleaf->el.code = new unsigned char[CodeLength+1];
pleaf->el.codelen = CodeLength;//统计每一个编码的长度
pleaf->el.EffCodeLenght = pleaf->el.weight * pleaf->el.codelen;//相对有效码长
for( i = 0; i <= CodeLength; i++ )
pleaf->el.code[i] = InverseCode[CodeLength-i];
pleaf = pleaf->leaflink;
CodeLength = 0;
}
}
////////////////////////////////////////////////////////////////////////////*/
//输出编码表到标准输出设备(屏幕)
void PrintCL(const int n, const Position CodeList) //输出编码表到标准输出设备(屏幕)
{
Position p;
p = CodeList;
float AvgCodeLength = 0;
printf("Character\tFrequency\t\tHuffman Code\t\tCodeLength\n");
for(int i = 0; i < n; i++)
{
if(p->el.ch!=7){
printf("%c\t\t%-5.6f%c\t\t\t%s\t\t%d\n",p->el.ch, p->el.weight*100, '%', p->el.code,p->el.codelen);
AvgCodeLength += p->el.EffCodeLenght;
}
p = p->leaflink;
}
printf("\n***** 一共出现%d种字符 ***** \n",n);
printf("\n****** 平均码长: %.6f ****** \n",AvgCodeLength);
}
///////////////////////////////////////////////////////////////////////
//编码函数
void EnCode0(FILE *fpsrc, FILE *fpcode, Position codelist)
{
Position p;
p = codelist;
int i = 0;
unsigned char c;
unsigned char buffer = 0x00;
c = getc(fpsrc);
while(!(feof(fpsrc))){
if(p->el.ch == c){
for(int j=0;j<p->el.codelen;j++){
buffer += (1&p->el.code[j]);
i+=1;
if(i==8){
putc(buffer,fpcode);
i=0;
buffer = 0x00;
}
buffer<<=1;
}
p = codelist;
c = getc(fpsrc);
}
else p=p->leaflink;
}
//文件末尾问题
while(p->el.ch != c)
p=p->leaflink;
for(int j=0;j<p->el.codelen;j++){
buffer += (1&p->el.code[j]);
i+=1;
if(i==8){
putc(buffer,fpcode);
i=0;
buffer = 0x00;
}
buffer<<=1;
}
if(i!=0){
buffer<<=(7-i);
putc(buffer,fpcode);
}
//最后一个buffer可能没有写满,利用i值补'0'//*/
rewind(fpcode);
}
/////////////////////////////////////////////////////////////////////////////
/*/解码函数
void DeCode0(FILE *fpcode, FILE *ffppcode, Position head)
{
Position p;
p = head;
int i = 0;
int j = 0;
unsigned long GetTextLength = 0;
unsigned long TextLengthCount = 0;
unsigned char buffer[4096] = {0};
unsigned int bit = 0x80;
fread(&GetTextLength,4,1,fpcode);//首先读取文件长度
fread(&buffer[0],1,4096,fpcode);//读取第一段数据
while(TextLengthCount<GetTextLength-1){
p = head;
while(p->left||p->right){
if(!(buffer[j]&bit))
p = p->left;
else
p = p->right;
i+=1;
if(i == 8){
i = 0;
bit = 0x100;
j += 1;
}
if(j == 4096){//每次译码完成4096个字节后,读入新的数据
fread(&buffer[0],1,4096,fpcode);
j = 0;
}
bit >>= 1;
}
fprintf(ffppcode, "%c", p->el.ch);
TextLengthCount++;
}
}
////////////////////////////////////////////////////////////////////////////*/
//释放内存
void FreeTree(Position tree)
{
if (tree == NULL) return;
FreeTree(tree->left);
FreeTree(tree->right);
delete tree;
}
/////////////////////////////////////////////////////////////////////////////
////////////////////////
/// 建立RVLC树 ///
////////////////////////
/*/得到字符的双向指针链表
void CountLeafNumberOfEachLevle(const Position codelist, Position &codelistend, int leaf_number[], int &L)
{
Position f_1, f_2;
f_1 = codelist;
f_1->leaffather = NULL;
f_2 = f_1->leaflink;
f_2->leaffather = f_1;
codelistend = f_2;
while(f_2->leaflink->leaflink){
f_1 = f_2;
f_2 = f_1->leaflink;
f_2->leaffather = f_1;
codelistend = f_2;
}
L = codelistend->el.codelen;
while(codelistend){
leaf_number[codelistend->el.codelen]++;
if(L < codelistend->el.codelen)
L = codelistend->el.codelen;
codelistend = codelistend->leaffather;
}
}
////////////////////////////////////////////////////////////////////////////
//构建RVLC树
void RVLC(Position RVLC_head, Position codelistend, int L, int leaf_number[], Position LevelHead[])
{
int LL = L ;//比原始huffman树多一层,以防结点不够
int count = 0;
Position p, q, temp;
while(LL>0){
count = (int)pow(2,LL);
temp = NULL;
p = new(Node);
p->property = 0;
p->rwo = LL;
p->line = 0;
LevelHead[LL] = p;
q = new(Node);
q->property = 0;
q->rwo = LL;
q->line = 1;
p->link = q;
q->link = NULL;
temp = q;
for(int i = 2; i < count;){
p = new(Node);
p->property = 0;
p->rwo = NULL;
p->line = i;
i++;
q = new(Node);
q->property = 0;
q->rwo = LL;
q->line = i;
i++;
temp->link = p;
p->link = q;
q->link = NULL;
temp = q;
}
LL--;
}
}
////////////////////////////////////////////////////////////////////////////*/
//主函数
int main()
{
int n = 26;//字符个数
int L = 0;//字符编码最长长度
aType B;
int leaf_on_level[30] = {0};
Position levelhead[30];
clock_t start,finish, clock();
double duration;
Position head, RVLC_head;
Position codelist, codelistend;
FILE *f_source, *f_code;
if(!(f_source = fopen("1.jpg","rb"))){/////"rb",只读形式打开一个操作对象
printf("1.can not open the file correctly!\n");
return 0;
}
if(!(f_code = fopen("huf_code.wangchao","wb+"))){/////"wb+"---存放文本统计信息以及编码
printf("2.can not open the file correctly!\n");
return 0;
}
////调用统计文本函数
start = clock();
Text(n, B, f_source, f_code);
finish = clock();
duration = (double)(finish - start);
printf("统计用时:%.2fms\n\n",duration);
//根据统计结果建立huffman树
Huffman(B, n, head, codelist);
///step2:为每个出现的字符制定编码
ProduceCodeList(n, codelist, head);
///step3:打印每个字符的出现概率、编码
printf("\nstep3: print code\n");
PrintCL(n, codelist);
//读取原始huffman树的各层信息
//CountLeafNumberOfEachLevle(codelist, codelistend, leaf_on_level, L);
for(int j = 1;j <= L; j++)
printf("%d ",leaf_on_level[j]);
RVLC_head = NULL;
//建立RVLC树
//RVLC(RVLC_head, codelistend, L, leaf_on_level, levelhead);
////step4:读文件、编码,并且把所得编码输出到huf_code.wangchao文件
printf("\n编码中......\n");
start = clock();
EnCode0(f_source, f_code, codelist);
finish = clock();
duration = (double)(finish - start);
printf("\n编码用时:%.2fms\n",duration);
fclose(f_source);
fclose(f_code);
////释放内存
FreeTree(head);
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -