📄 001.cpp

📁 赫夫曼树和赫夫曼编码的存储表示
💻 CPP
字号:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
///////////////////////////////////////////////////////////////////////////////////////////////////

#include <iostream>
#include <fstream>
#include <iomanip>
#include <cstring>

using namespace std;

///////////////////////////////////////////////////////////
// 赫夫曼树和赫夫曼编码的存储表示
typedef struct {
unsigned int weight;
unsigned int parent , lChild , rChild;
} HTNode , *HuffmanTree; // 动态分配数组存储赫夫曼树

typedef struct {
char ch;
char* hufCh;
} HuffmanCode;           // 动态分配数组存储赫夫曼编码表

///////////////////////////////////////////////////////////
// 权值字符结点类型
typedef struct {
char ch;
int wt;
} wElem;                 // 动态分配数组存储读入字符与权值

///////////////////////////////////////////////////////////
// 赫夫曼编码函数
void HuffmanCoding( HuffmanTree & , 
        HuffmanCode * , 
     wElem *       , 
     int          );

///////////////////////////////////////////////////////////
// 对文件 tobebin.txt 中的正文进行编码
// 将结果存入 codefile.txt 
void EnCoding( HuffmanCode * , const char* );

///////////////////////////////////////////////////////////
// 对文件 codefile.txt 里的代码进行译码
// 将结果存入 textfile.txt
void DeCoding( HuffmanTree   , 
      HuffmanCode * , 
      const char*   , 
      const int    );

///////////////////////////////////////////////////////////
// 对 codefile.txt 里的代码进行编排,显示,打印
void PrintCode( const char* , const char* );

///////////////////////////////////////////////////////////
// 选择两个最小的结点
void SelectTwoNode( HuffmanTree , int , int& , int& );

int main() 
{
//
// 用文件流对象 hufInPut 打开外部文件
// 例如 humanWeight.txt 储存字符集大小 n ，以及 n 个字符和 n 个权值
char* wFileName = new char[ 128 ]; // 存放文件名的工作空间
cout << "The CHAR and WEIGHT stores in which file?" << endl
  << "Please input the file name( such as  humanWeight.txt ) : ";
cin >> wFileName;
ifstream hufInPut( wFileName , ios::in );
if( !hufInPut ) cerr << "file can not be opened" << endl;

int hufNum = 0;
hufInPut >> hufNum; // 读入字符集大小 n 到 hufNum

// 输出基本信息
cout << "total number of char: " << hufNum << endl;
cout << " char   weight" << endl;

wElem* hufW = new wElem[ hufNum ]; // 分配 n 个字符和 n 个权值的储存空间
for( int ii = 0 ; ii < hufNum ; ii++ )
{
  // 
  // 循环读入字符及其对应的权值
  hufInPut >> hufW[ ii ].ch >> hufW[ ii ].wt;
  cout << setw( 4 ) << hufW[ ii ].ch << setw( 8 ) << hufW[ ii ].wt << endl;
}
cout << endl;
hufInPut.close();    // 关闭存放字符和权值的文件
delete [] wFileName; // 释放工作空间

HuffmanTree hufT; // 空树
HuffmanCode* hufC = ( HuffmanCode* ) malloc ( ( hufNum + 1 ) * sizeof( HuffmanCode ) ); // 分配n个字符编码的头指针向量

HuffmanCoding( hufT , hufC , hufW , hufNum ); // 编码中...

//
// 用文件流对象 hufTreeOutPut 把赫夫曼树 HT , HC 输出到文件 hufTree.txt 中
ofstream hufTreeOutPut( "hufTree.txt" , ios::out );
hufTreeOutPut << "-- HT ------------------------------- " << endl << setw( 9 ) 
            << "         weight " << " parent " << " lchild " << " rchild " << endl;
for( int tOut = 1 ; tOut <= 2*hufNum-1 ; tOut++ )
{
  hufTreeOutPut << setw( 6 ) << tOut
             << setw( 8 ) << hufT[ tOut ].weight 
       << setw( 8 ) << hufT[ tOut ].parent 
       << setw( 8 ) << hufT[ tOut ].lChild 
       << setw( 8 ) << hufT[ tOut ].rChild << endl;
}
hufTreeOutPut << "-- end HT --------------------------- " << endl << endl 
            << "-- HC ------------------------------- " << endl;
for( int cOut = 1 ; cOut <= hufNum ; cOut++ )
{
  hufTreeOutPut << "   " << hufC[ cOut ].ch << " ---->> " << hufC[ cOut ].hufCh << endl;
}
hufTreeOutPut << "-- convert -- ok -------------------- " << endl;
hufTreeOutPut.close(); // 输出完毕，关闭文件

delete [] hufW; // 释放内存

// 向屏幕输出正文内容
cout << "the text which is to be EnCoding is: " << endl;
ifstream textIn( "tobebin.txt" , ios::in );
if( !textIn ) cerr << "file can not be opened" << endl;
char textInBuf = '0\';
while( textIn.get( textInBuf ) ) cout << textInBuf;
cout << endl << endl;

EnCoding( hufC , "tobebin.txt" );  // 对正文编码

// 向屏幕输出编码过的代码内容
cout << "after EnCoding , the code looks like: " << endl;
ifstream codeIn( "codefile.txt" , ios::in );
if( !codeIn ) cerr << "file can not be opened" << endl;
char codeInBuf = \'0\';
while( codeIn.get( codeInBuf ) ) cout << codeInBuf;
cout << endl << endl;

DeCoding( hufT , hufC , "codefile.txt" , hufNum ); // 译码

// 向屏幕输出译码后的内容
cout << "after DeCoding , the code looks like: " << endl;
ifstream dcodeIn( "textfile.txt" , ios::in );
if( !dcodeIn ) cerr << "file can not be opened" << endl;
char dcodeInBuf = \'0\';
while( dcodeIn.get( dcodeInBuf ) ) cout << dcodeInBuf;
cout << endl << endl;

// 每行 50 个字符
cout << "the print style is looks like: " << endl;
PrintCode( "codefile.txt" , "codeprint.txt" ); // 打印代码
cout << endl;
                                                                                                                                                                                                                                                                   cout << endl << "name: Zhu Yong" << endl << "classID:13011090" << endl << "2003 software school" << endl;
return 0;
}

//////////////////////////////////////////////////////////////////////////////////////////////////
//
// w 存放 n 个字符的权值（ 均大与0 ）
// 构造赫夫曼树 HT , 并求出 n 个字符的赫夫曼编码 HC
//
void HuffmanCoding( HuffmanTree &HT , HuffmanCode* HC , wElem* w , int n ) 
{
if( n <= 1 ) return;

int m = 2 * n - 1; // 赫夫曼树的结点数目
HT = ( HuffmanTree ) malloc ( ( m + 1 ) * sizeof( HTNode ) ); file://0 号单元未用
HuffmanTree p = HT;
p++;               // p 指向 HT 的第 1 号单元
int i=0 , ww=0;    // ww 为 wElem* w 的下标 
for( i = 1 ; i <= n ; i++ , p++ , ww++ ) 
{
  p->weight = w[ww].wt;
  p->parent = p->lChild = p->rChild = 0;      //
}                                               // 初始化
for( ; i <= m ; ++i , ++p )                     // 
{
  p->weight = p->parent = p->lChild = p->rChild = 0;
}

//
// 建赫夫曼树
// 在 HT[ 1 .. i-1 ]选择parent 为 0 且 weight 最小的两个结点，其序号分别为 s1 和 s2 
//
int s1=0 , s2=0;
for( i = n + 1 ; i <= m ; ++i )            
{
     SelectTwoNode( HT , i-1 , s1 , s2 ); 
  HT[ s1 ].parent = i;   // 标记 parent               
  HT[ s2 ].parent = i;   // 标记 parent             
  HT[ i ].lChild = s1;   // 标记左孩子
  HT[ i ].rChild = s2;   // 标记右孩子
  HT[ i ].weight = HT[ s1 ].weight + HT[ s2 ].weight; // 新结点的权值
}                                              

//
// 从叶子到根逆向求每个字符的赫夫曼编码
//
char* cd = new char[ n ];         // 分配求编码的工作空间
cd[ n - 1 ] = \'\\0\';               // 编码结束符
int c = 0;
int f = 0;
for( i = 1 ; i <= n ; ++i )       // 逐个字符求赫夫曼编码
{
  int start = n - 1;            // 编码结束符位置
  for( c = i , f = HT[ i ].parent ; f != 0 ; c = f , f = HT[ f ].parent ) // 从叶子到根逆向求编码
  {
   if( HT[ f ].lChild == c ) { cd[ --start ] = \'0\'; }
   else { cd[ --start ] = \'1\'; }
  }
  HC[ i ].ch = w[ i-1 ].ch ;    // 复制字符
  HC[ i ].hufCh = ( char* ) malloc ( ( n - start ) * sizeof( char ) ); // 为第 i 个字符编码分配空间
  strcpy( HC[ i ].hufCh , &cd[ start ] );              // 从 cd 复制编码到 HC
}
delete [] cd;  // 释放工作空间

} // end HuffmanCoding()


//////////////////////////////////////////////////////////////////////////////////////////////////
//
// 选择两个最小的结点 , 序号分别放在 s1 和 s2 中
//
void SelectTwoNode( HuffmanTree HT , int i , int& s1 , int& s2 )
{
unsigned int sm1 , sm2;   // 用于权值的比较
s1 = 1;   // 从序号为 1 的权值开始 
s2 = 1;
int m=0;  // 最小权值的序号临时变量
for( m = 1 ; m <= i ; m++ ) // 第一遍中第一个 parent 为 0 的结点
{
  if( HT[ m ].parent != 0 ) continue;
  else 
  {
   sm1 = HT[ m ].weight;
   s1=m;
   break;
  }
}
for( int j = m+1 ; j <= i ; j++ ) // 第一遍选第一个最小的
{
  if( HT[ j ].parent != 0 ) continue;
  else
  {
   if( sm1 > HT[ j ].weight ) 
   {
    sm1 = HT[ j ].weight;
    s1 = j;
   }
  }
}
for( m = 1 ; m <= i ; m++ ) // 第二遍中第一个 parent 为 0 的结点
{
  if( HT[ m ].parent != 0 ) continue;
  else 
  {
   sm2 = HT[ m ].weight;
   s2=m;
   if( s2 == s1 ) continue;
   else break;
  }
}
for( int k = m+1 ; k <= i ; k++ ) // 第二遍选第二个最小的
{
  if( HT[ k ].parent != 0 ) continue;
  else
  {
   if( (HT[ k ].weight < sm2) && ( k != s1 ) ) // 保证sm1 != sm2
   {
    sm2 = HT[ k ].weight;
    s2 = k;
   }
  }
}
} // end SelectTwoNode()

//////////////////////////////////////////////////////////////////////////////////////////////////
//
// 要编码的正文存放在 iFile 里
// 通过 fIn 的成员函数 get( inBuf ) 从 iFile 里读入每一个字符 ，其中 char inBuf 为字符缓冲区 ^_^
// 
// 编码后的结果通过 fOut( oftream 的对象 ) 输出到文件 codefile.txt
//
void EnCoding( HuffmanCode* HC , const char* iFile )
{
ifstream fIn( iFile , ios::in ); 
char inBuf = \'#\';
ofstream fOut( "codefile.txt" , ios::out );

int sub = 0; // HC 的下标
while( fIn.get( inBuf ) ) // 读入字符，直至文件结束符
{
  if( inBuf == \' \' ) 
  {
   sub = 1; // HC[ 1 ].hufCh == \' \'
   fOut << HC[ sub ].hufCh;
  }
  else if( inBuf == \'\\n\' )
  {
   continue;
  }
  else 
  {
   //
   // 下标换算
   // 因为 \'A\' 的 ASCII码为 65 ，又\'A\' 在 HC 里的下标为 2
   // 即 HC[ 2 ].hufCh == \'A\'.  以下的字符雷同
   sub = inBuf - 63;     
   fOut << HC[ sub ].hufCh;
  }   
}
                   //
fIn.close();   // 显式关闭文件
fOut.close();  //
                    
} // end EnCoding()

//////////////////////////////////////////////////////////////////////////////////////////////////
// 
// 本应该把赫夫曼编码放进 map( , ) 里的，但本程序没有这样做，显得效率有点低
// 在求子串相应的字符时，用的方法为在 HC 里进行逐字扫描比较  ^&^
// 文件流的使用同 EeCoding() 里的一样
// 参数 n 为字符个数，p = 2*n-1 就为 HT 的结点数目. HT[p]就为 HT 的根.
//
void DeCoding( HuffmanTree HT , HuffmanCode * HC , const char* iFile , const int n )
{
ifstream fIn( iFile , ios::in );
char inBuf = \'1\';
ofstream fOut( "textfile.txt" , ios::out );

char* cd = new char[ n ];  // 储存字串的工作空间
int start = 0;             // 译码开始标记
int p = 2 * n - 1;         // 根下标
int iHC = 1;               // 用于扫描 HC 的循环变量

while( fIn.get( inBuf ) )  // 循环读入 \'0\' 或 \'1\' 或 \'\\n\'
{
  if( inBuf == \'0\' )
  {
   if( HT[ p ].lChild != 0 )   // 不是叶子
   {
    cd[ start++ ] = inBuf;  // 压 inBuf 进 cd*
    p = HT[ p ].lChild;     // 进入左子树
    continue;
   }
   else // 左叶子
   {
    cd[ start ] = \'\\0\';     // 子串结束符
    for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
    {
     if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
     {
      fOut << HC[ iHC ].ch;
      break; // 找到，跳出 for
     }
     else continue;
    }

    start = 0;                //
    cd[ start++ ] = inBuf;    // 为读下一子串做准备
    p = HT[ 2*n - 1 ].lChild; // 注意，这时 p 已指向 root 的 left child
   }
  }
  else if( inBuf == \'1\' )
  {
   if( HT[ p ].rChild != 0 )   // 不是叶子
   {
    cd[ start++ ] = inBuf;  // 压 inBuf 进 cd*
    p = HT[ p ].rChild;     // 进入左子树
    continue;
   }
   else // 右叶子
   {
    cd[ start ] = \'\\0\';     // 子串结束符
    for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
    {
     if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
     {
      fOut << HC[ iHC ].ch;
      break; // 找到，跳出 for
     }
     else continue;
    }

    start = 0;                //
    cd[ start++ ] = \'1\';      // 为读下一子串做准备
    p = HT[ 2*n - 1 ].rChild; // 注意，这时 p 已指向 root 的 right child
   }
  }
  else if( inBuf == \'\\n\' ) // 有必要的话，加了个回车符，哈哈
  {
   continue;
/*   cd[ start ] = \'\\0\';         // 子串结束符
   for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
   {
    if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
    {
     fOut << HC[ iHC ].ch;
     break; // 找到，跳出 for
    }
    else continue;
   }

   fOut << endl;

   start = 0;            // 
   p = 2*n - 1;          // p 指向 root
*/             
  }
  else
  {
  }
}

//
// 不要忘了最后一个字符。
cd[ start ] = \'\\0\';
for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
{
  if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
   {
    fOut << HC[ iHC ].ch;
    break; // 找到，跳出 for
   }
  else continue;
}

delete [] cd; // 释放工作空间
fIn.close();  // 关闭文件
fOut.close(); // 关闭文件

} // end DeCoding()

///////////////////////////////////////////////////////////////////////////////////////////////////
// 
// 对 codefile.txt 里的代码进行编排
// 以每行 50 个代码的格式显示在终端上，同时存入文件 codeprint.txt 里
// 
void PrintCode( const char* iFile , const char* oFile )
{
ifstream fIn( iFile , ios::in );
char inBuf = \'#\';
ofstream fOut( oFile , ios::out );

int count = 0; // 记数, count %= 50

while( fIn.get( inBuf ) )
{
  if( count > 49 ) { cout << endl; fOut << endl; count%=50; } // 换行
  cout << inBuf;
  fOut << inBuf;  
  count++;
}

fIn.close();
fOut.close();
} // end PrintCode()

--------------------------------------------------------------------------------
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -