📄 redfun.cpp
字号:
#include "redfun.h"
#include <fstream.h>
#include <string.h>
#include <math.h>
const int MAX_B_KIND = 256;
/*-------------------------------------------------------
目的:用文件初始化对象
输入:char *fName 待压缩文件名
输出:无
--------------------------------------------------------*/
FReduce::FReduce(char *fName)
{
strcpy(m_fileName,fName);
m_kind = 0;
m_num = 0;
m_pCRule = NULL;
m_pFBFre = NULL;
}
/*-------------------------------------------------------
目的:析构函数
输入:无
输出:无
--------------------------------------------------------*/
FReduce::~FReduce(void)
{
if(m_pFBFre!=NULL)
{
delete []m_pFBFre;
m_pFBFre = NULL;
}
if(m_pCRule!=NULL)
{
delete []m_pCRule;
m_pCRule = NULL;
}
}
/*-------------------------------------------------------
目的:对将要压缩的文件按字节进行香农编码
输入:无
输出:若编码成功则返回true,否则返回false
--------------------------------------------------------*/
bool FReduce::FCoding()
{
if(!FStating())
{
return false;
}
else
{
if(m_pFBFre == NULL)
{
return false;
}
else
{
int i = 0, j = 0;
// 求累积概率
float *cumP = new float[m_kind];
cumP[0] = 0; // 累积概率
for(i=1; i<m_kind; i++)
{
cumP[i] = m_pFBFre[i-1].freqency + cumP[i-1];
}
// 求码长
int *codLen = new int[m_kind];
for(i=0; i<m_kind; i++)
{
codLen[i] = (int)ceil(-log(m_pFBFre[i].freqency)/log(2));
}
// 求码字
m_pCRule = new CRule[m_kind];
float tempProb = 0.0; // 临时累积概率
for(i=0; i<m_kind; i++)
{
m_pCRule[i].fByte = m_pFBFre[i].fByte;
tempProb = cumP[i];
int temp = 0;
m_pCRule[i].code = new char[codLen[i]];
for(j=0; j<codLen[i]; j++)
{
temp = ((int)(tempProb*2))%2;
if(temp == 1)
{
m_pCRule[i].code[j] = '1';
}
else
{
if(temp == 0)
{
m_pCRule[i].code[j] = '0';
}
else
{
return false;
}
}
tempProb = tempProb*2 - (float)temp;
}
m_pCRule[i].code[j] = '\0';
}
delete []cumP;
delete []codLen;
return true;
}
}
}
/*-------------------------------------------------------
目的:统计文件各类字节出现的频率、类数、个数,
并将各类字节依概率降序排列
输入:无
输出:统计成功返回true,否则返回false
--------------------------------------------------------*/
bool FReduce::FStating(void)
{
if(m_fileName == NULL)
{
return false;
}
else
{
int i = 0, j = 0;
ifstream in(m_fileName, ios::binary | ios::nocreate); // 打开文件
// 统计文件字节概率、类数、个数
unsigned int statresult[MAX_B_KIND] = {0};
unsigned char ch = 0x00;
in.read(&ch,1);
while(!in.eof())
{
i++;
statresult[ch] += 1;
in.read(&ch,1);
}
for(i=0; i<MAX_B_KIND; i++)
{
if(statresult[i]!=0)
{
m_kind++; // 统计字节类数
m_num += statresult[i]; // 统计字节个数
}
}
m_pFBFre = new FBFre[m_kind];
for(i=0,j=0; i<MAX_B_KIND; i++)
{
if(statresult[i]!=0)
{
m_pFBFre[j].freqency = (float)statresult[i]/(float)m_num; // 字节概率
m_pFBFre[j].fByte = (char)i;
j++;
}
}
if(!ProbSort()) // 对各字节依概率降序排列
{
return false;
}
return true;
}
}
/*-------------------------------------------------------
目的:将源文件按香农编码规则压缩存储
输入:char *destFile 压缩文件存储路径
输出:存储成功返回ture,否则返回false
--------------------------------------------------------*/
bool FReduce::FReduceSaving(char *destFile)
{
if(m_pCRule==NULL || destFile==NULL)
{
return false;
}
else
{
ofstream out(destFile, ios::binary);
// 向目标文件写入编码规则
int i = 0, j = 0;
char chBlock[8]; // 临时存放8个码元符号
unsigned char bitBuffer = NULL; // 缓存,用来向文件中写入一个字节的数据
unsigned char length; // 码字长
bool flag = true; // 编码写入开关
out<<(unsigned char)(m_kind-1);
for(i=0; i<m_kind; i++)
{
out<<m_pCRule[i].fByte;
length = strlen((char*)m_pCRule[i].code);
out<<length;
flag = true; // 打开编码写入开关
int time = 0;
while(flag)
{
if(length<=8)
{
bitBuffer = Trans(&m_pCRule[i].code[time*8], length);
bitBuffer <<= 8-length; // 将有效码字移致最高位
out<<bitBuffer;
flag = false; // 关闭第i个字节的编码写入开关
}
else
{
bitBuffer = Trans(m_pCRule[i].code);
out<<bitBuffer;
length -= 8;
}
time++;
}
}
// 将源文件依编码规则压缩存贮
ifstream in(m_fileName, ios::binary | ios::nocreate);
if(!in.is_open())
{
return false;
}
unsigned char ch;
int subscrip; // ch 相应下标
j = 0; // 标识8个长度的数据块
in.read(&ch, 1);
while(!in.eof())
{
i = 0; // 标识码字的长度
subscrip = Search(ch); // 查找ch相应的下标
while(m_pCRule[subscrip].code[i] =='0' || m_pCRule[subscrip].code[i] =='1')
{
if(j<8)
{
chBlock[j] = m_pCRule[subscrip].code[i];// 用八个码元凑够一个字节的数据块
i++;
j++;
}
else
{
bitBuffer = Trans(chBlock);
out<<bitBuffer; // 将编码后的数据写入文件
j = 0;
chBlock[0] = '\0';
}
}
in.read(&ch, 1);
}
if(chBlock[0]!='\0') // 不够一个数据块时
{
unsigned char tempBuffer = NULL; // 临时缓存
bitBuffer = Trans(chBlock,j);
tempBuffer = Trans(m_pCRule[m_kind-1].code, 8-j); // 用最长的码字用来填充未满的数据块
bitBuffer <<=(8-j);
unsigned char temp = bitBuffer|tempBuffer;
out<<temp; // 向文件写入数据
}
return true;
}
}
/*-------------------------------------------------------
目的:将各字节依概率降序排列
输入:无
输出:bool 排序成功返回true,否则返回false
--------------------------------------------------------*/
bool FReduce::ProbSort()
{
if(m_kind == 0)
{
return false;
}
else
{
int i = 0, j = 0;
float temp;
unsigned char tchar;
for(i=0; i<m_kind; i++)
{
for(j=0; j<m_kind-i+1; j++)
{
if(m_pFBFre[j].freqency<m_pFBFre[j+1].freqency)
{
temp = m_pFBFre[j].freqency;
tchar = m_pFBFre[j].fByte;
m_pFBFre[j].freqency = m_pFBFre[j+1].freqency;
m_pFBFre[j].fByte = m_pFBFre[j+1].fByte;
m_pFBFre[j+1].freqency = temp;
m_pFBFre[j+1].fByte = tchar;
}
}
}
return true;
}
}
/*-------------------------------------------------------
目的:把'0''1'字符数组转化成二进制数据块
输入:char* ch 一个指字符数组头指针
int length 字符数组的长度,默认值为8(一个字节长)
输出:返回一个字节长度的数据块
--------------------------------------------------------*/
unsigned char FReduce::Trans(char *ch, int length)
{
unsigned char dateblock = 0x00; // 存放8位二进制数据
char chbuffer = 0x01; // 临时存放字符
int i = 0;
for(i = 0; i<length; i++)
{
if(ch[i] == '1')
{
chbuffer = 1;
chbuffer <<= (length-1-i);
dateblock += chbuffer;
}
}
return dateblock;
}
/*-------------------------------------------------------
目的:从编码规则中选择出ch的码子,并返回其在树组中下标
输入:unsigned char ch 输入要查询字节
输出:int 返回ch在编码规则数组中对应的下标
--------------------------------------------------------*/
int FReduce::Search(unsigned char ch)
{
int i = 0;
for(i=0; i<m_kind; i++)
{
if(ch == m_pCRule[i].fByte)
{
return i;
}
}
return -1;
}
/*------------------------------------------------------
目的:打印测试结果
输入:char* oriFile 原始文件
char* dstFile 目标文件
输出:无
------------------------------------------------------*/
void FReduce::TestResult(char* oriFile, char* dstFile)
{
int i = 0;
cout<<"信源符号的概率分布:"<<endl;
cout<<"xi"<<'\t'<<"value"<<'\t'<<"freq"<<endl;
cout<<"-----------------------------------"<<endl;
for(i=0; i<m_kind; i++)
{
cout<<'x'<<i+1<<'='<<'\t'<< hex <<(int)m_pFBFre[i].fByte<<'\t'<< dec <<m_pFBFre[i].freqency<<endl;
}
cout<<"-----------------------------------"<<endl;
float entropy = 0.0; // 熵
float residual = 0.0; // 剩余度
for(i=0; i<m_kind; i++)
{
entropy += -(m_pFBFre[i].freqency * (log(m_pFBFre[i].freqency)/log(2)));
}
residual = entropy/(log(m_kind)/log(2));
cout<<"熵:"<<'\t'<<'\t'<<entropy<<" bit"<<endl;
cout<<"剩余度:"<<'\t'<<residual<<endl;
int* codeLen = new int[m_kind];
int sumCodeL = 0;
float aveCodeL = 0.0; // 平均码长
float codeEfficiency = 0.0; // 编码效率
for(i=0; i<m_kind; i++)
{
codeLen[i] = strlen(m_pCRule[i].code);
sumCodeL += codeLen[i];
aveCodeL += m_pFBFre[i].freqency*codeLen[i];
}
codeEfficiency = entropy / aveCodeL;
cout<<endl;
cout<<"码字:"<<endl;
cout<<"xi"<<'\t'<<"value"<<'\t'<<"len"<<'\t'<<"code"<<endl;
cout<<"-----------------------------------"<<endl;
for(i=0; i<m_kind; i++)
{
cout<<'x'<<i+1<<'\t'<< hex <<(int)m_pCRule[i].fByte<<'\t'<< dec <<codeLen[i]<<'\t'<<m_pCRule[i].code<<endl;
}
cout<<"-----------------------------------"<<endl;
cout<<"平均码长:"<<'\t'<<'\t'<<aveCodeL<<endl;
cout<<"编码效率:"<<'\t'<<'\t'<<codeEfficiency<<" %"<<endl;
cout<<endl;
cout<<"压缩结果:"<<endl;
cout<<"-----------------------------------"<<endl;
cout<<"原始文件:"<<oriFile<<'\t'<<m_num<<"字节"<<endl;
cout<<"目标文件:"<<dstFile<<"暂无"<<"字节"<<endl;
cout<<"-----------------------------------"<<endl;
cout<<endl;
cout<<" -- REPORT -- "<<endl;
cout<<"信源:"<<endl;
cout<<"-----------------------------------"<<endl;
cout<<"符号个数:"<<'\t'<<m_kind<<endl;
cout<<"熵:"<<'\t'<<'\t'<<entropy<<endl;
cout<<"剩余度:"<<'\t'<<residual<<endl;
cout<<endl;
cout<<"编码:"<<endl;
cout<<"-----------------------------------"<<endl;
cout<<"平均码长:"<<'\t'<<aveCodeL<<endl;
cout<<"编码效率:"<<'\t'<<codeEfficiency<<endl;
cout<<"理论压缩率:"<<'\t'<<"暂无"<<endl;
cout<<endl;
cout<<"压缩:"<<endl;
cout<<"-----------------------------------"<<endl;
cout<<"原始文件:"<<'\t'<<oriFile<<'\t'<<m_num<<"字节"<<endl;
cout<<"目标文件:"<<'\t'<<dstFile<<'\t'<<"暂无"<<"字节"<<endl;
cout<<"节省空间:"<<'\t'<<"暂无"<<"字节"<<endl;
cout<<"压缩率:"<<'\t'<<"暂无%"<<endl;
delete codeLen;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -