📄 huffmantreeandcode.cpp

📁 哈夫曼压缩的又一个例子程序
💻 CPP
字号:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////zy
// 
// 赫夫曼编/译码器
// 朱勇 13011090
//
///////////////////////////////////////////////////////////////////////////////////////////////////

#include <iostream>
#include <fstream>
#include <iomanip>
#include <cstring>

using namespace std;

///////////////////////////////////////////////////////////
// 赫夫曼树和赫夫曼编码的存储表示
typedef struct {
	unsigned int weight;
	unsigned int parent , lChild , rChild;
} HTNode , *HuffmanTree; // 动态分配数组存储赫夫曼树

typedef struct {
	char ch;
	char* hufCh;
} HuffmanCode;           // 动态分配数组存储赫夫曼编码表

///////////////////////////////////////////////////////////
// 权值字符结点类型
typedef struct {
	char ch;
	int wt;
} wElem;                 // 动态分配数组存储读入字符与权值

///////////////////////////////////////////////////////////
// 赫夫曼编码函数
void HuffmanCoding( HuffmanTree & , 
				    HuffmanCode * , 
					wElem *       , 
					int          );

///////////////////////////////////////////////////////////
// 对文件 tobebin.txt 中的正文进行编码
// 将结果存入 codefile.txt 
void EnCoding( HuffmanCode * , const char* );

///////////////////////////////////////////////////////////
// 对文件 codefile.txt 里的代码进行译码
// 将结果存入 textfile.txt
void DeCoding( HuffmanTree   , 
			   HuffmanCode * , 
			   const char*   , 
			   const int    );

///////////////////////////////////////////////////////////
// 对 codefile.txt 里的代码进行编排,显示,打印
void PrintCode( const char* , const char* );

///////////////////////////////////////////////////////////
// 选择两个最小的结点
void SelectTwoNode( HuffmanTree , int , int& , int& );

int main() 
{
	//
	// 用文件流对象 hufInPut 打开外部文件
	// 例如 humanWeight.txt 储存字符集大小 n ，以及 n 个字符和 n 个权值
	char* wFileName = new char[ 128 ]; // 存放文件名的工作空间
	cout << "The CHAR and WEIGHT stores in which file?" << endl
		<< "Please input the file name( such as  humanWeight.txt ) : ";
	cin >> wFileName;
	ifstream hufInPut( wFileName , ios::in );
	if( !hufInPut ) cerr << "file can not be opened" << endl;

	int hufNum = 0;
	hufInPut >> hufNum; // 读入字符集大小 n 到 hufNum

	// 输出基本信息
	cout << "total number of char: " << hufNum << endl;
	cout << " char   weight" << endl;

	wElem* hufW = new wElem[ hufNum ]; // 分配 n 个字符和 n 个权值的储存空间
	for( int ii = 0 ; ii < hufNum ; ii++ )
	{
		// 
		// 循环读入字符及其对应的权值
		hufInPut >> hufW[ ii ].ch >> hufW[ ii ].wt;
		cout << setw( 4 ) << hufW[ ii ].ch << setw( 8 ) << hufW[ ii ].wt << endl;
	}
	cout << endl;
	hufInPut.close();    // 关闭存放字符和权值的文件
	delete [] wFileName; // 释放工作空间
	
	HuffmanTree hufT; // 空树
	HuffmanCode* hufC = ( HuffmanCode* ) malloc ( ( hufNum + 1 ) * sizeof( HuffmanCode ) ); // 分配n个字符编码的头指针向量

	HuffmanCoding( hufT , hufC , hufW , hufNum ); // 编码中...

	//
	// 用文件流对象 hufTreeOutPut 把赫夫曼树 HT , HC 输出到文件 hufTree.txt 中
	ofstream hufTreeOutPut( "hufTree.txt" , ios::out );
	hufTreeOutPut << "-- HT ------------------------------- " << endl << setw( 9 ) 
		          << "         weight " << " parent " << " lchild " << " rchild " << endl;
	for( int tOut = 1 ; tOut <= 2*hufNum-1 ; tOut++ )
	{
		hufTreeOutPut << setw( 6 ) << tOut
			          << setw( 8 ) << hufT[ tOut ].weight 
					  << setw( 8 ) << hufT[ tOut ].parent 
					  << setw( 8 ) << hufT[ tOut ].lChild 
					  << setw( 8 ) << hufT[ tOut ].rChild << endl;
	}
	hufTreeOutPut << "-- end HT --------------------------- " << endl << endl 
		          << "-- HC ------------------------------- " << endl;
	for( int cOut = 1 ; cOut <= hufNum ; cOut++ )
	{
		hufTreeOutPut << "   " << hufC[ cOut ].ch << " ---->> " << hufC[ cOut ].hufCh << endl;
	}
	hufTreeOutPut << "-- convert -- ok -------------------- " << endl;
	hufTreeOutPut.close(); // 输出完毕，关闭文件

	delete [] hufW; // 释放内存

	// 向屏幕输出正文内容
	cout << "the text which is to be EnCoding is: " << endl;
	ifstream textIn( "tobebin.txt" , ios::in );
	if( !textIn ) cerr << "file can not be opened" << endl;
	char textInBuf = '0';
	while( textIn.get( textInBuf ) ) cout << textInBuf;
	cout << endl << endl;

	EnCoding( hufC , "tobebin.txt" );  // 对正文编码

	// 向屏幕输出编码过的代码内容
	cout << "after EnCoding , the code looks like: " << endl;
	ifstream codeIn( "codefile.txt" , ios::in );
	if( !codeIn ) cerr << "file can not be opened" << endl;
	char codeInBuf = '0';
	while( codeIn.get( codeInBuf ) ) cout << codeInBuf;
	cout << endl << endl;

	DeCoding( hufT , hufC , "codefile.txt" , hufNum ); // 译码

	// 向屏幕输出译码后的内容
	cout << "after DeCoding , the code looks like: " << endl;
	ifstream dcodeIn( "textfile.txt" , ios::in );
	if( !dcodeIn ) cerr << "file can not be opened" << endl;
	char dcodeInBuf = '0';
	while( dcodeIn.get( dcodeInBuf ) ) cout << dcodeInBuf;
	cout << endl << endl;

	// 每行 50 个字符
	cout << "the print style is looks like: " << endl;
	PrintCode( "codefile.txt" , "codeprint.txt" ); // 打印代码
	cout << endl;
                                                                                                                                                                                                                                                                   cout << endl << "name: Zhu Yong" << endl << "classID:13011090" << endl << "2003 software school" << endl;
	return 0;
}

//////////////////////////////////////////////////////////////////////////////////////////////////
//
// w 存放 n 个字符的权值（ 均大与0 ）
// 构造赫夫曼树 HT , 并求出 n 个字符的赫夫曼编码 HC
//
void HuffmanCoding( HuffmanTree &HT , HuffmanCode* HC , wElem* w , int n ) 
{
	if( n <= 1 ) return;

	int m = 2 * n - 1; // 赫夫曼树的结点数目
	HT = ( HuffmanTree ) malloc ( ( m + 1 ) * sizeof( HTNode ) ); //0 号单元未用
	HuffmanTree p = HT;
	p++;               // p 指向 HT 的第 1 号单元
	int i=0 , ww=0;    // ww 为 wElem* w 的下标 
	for( i = 1 ; i <= n ; i++ , p++ , ww++ ) 
	{
		p->weight = w[ww].wt;
		p->parent = p->lChild = p->rChild = 0;      //
	}                                               // 初始化
	for( ; i <= m ; ++i , ++p )                     // 
	{
		p->weight = p->parent = p->lChild = p->rChild = 0;
	}

	//
	// 建赫夫曼树
	// 在 HT[ 1 .. i-1 ]选择parent 为 0 且 weight 最小的两个结点，其序号分别为 s1 和 s2 
	//
	int s1=0 , s2=0;
	for( i = n + 1 ; i <= m ; ++i )            
	{
	    SelectTwoNode( HT , i-1 , s1 , s2 ); 
		HT[ s1 ].parent = i;   // 标记 parent               
		HT[ s2 ].parent = i;   // 标记 parent             
		HT[ i ].lChild = s1;   // 标记左孩子
		HT[ i ].rChild = s2;   // 标记右孩子
		HT[ i ].weight = HT[ s1 ].weight + HT[ s2 ].weight; // 新结点的权值
	}                                              

	//
	// 从叶子到根逆向求每个字符的赫夫曼编码
	//
	char* cd = new char[ n ];         // 分配求编码的工作空间
	cd[ n - 1 ] = '\0';               // 编码结束符
	int c = 0;
	int f = 0;
	for( i = 1 ; i <= n ; ++i )       // 逐个字符求赫夫曼编码
	{
		int start = n - 1;            // 编码结束符位置
		for( c = i , f = HT[ i ].parent ; f != 0 ; c = f , f = HT[ f ].parent ) // 从叶子到根逆向求编码
		{
			if( HT[ f ].lChild == c ) { cd[ --start ] = '0'; }
			else { cd[ --start ] = '1'; }
		}
		HC[ i ].ch = w[ i-1 ].ch ;    // 复制字符
		HC[ i ].hufCh = ( char* ) malloc ( ( n - start ) * sizeof( char ) ); // 为第 i 个字符编码分配空间
		strcpy( HC[ i ].hufCh , &cd[ start ] );              // 从 cd 复制编码到 HC
	}
	delete [] cd;  // 释放工作空间

} // end HuffmanCoding()


//////////////////////////////////////////////////////////////////////////////////////////////////
//
// 选择两个最小的结点 , 序号分别放在 s1 和 s2 中
//
void SelectTwoNode( HuffmanTree HT , int i , int& s1 , int& s2 )
{
	unsigned int sm1 , sm2;   // 用于权值的比较
	s1 = 1;   // 从序号为 1 的权值开始 
	s2 = 1;
	int m=0;  // 最小权值的序号临时变量
	for( m = 1 ; m <= i ; m++ ) // 第一遍中第一个 parent 为 0 的结点
	{
		if( HT[ m ].parent != 0 ) continue;
		else 
		{
			sm1 = HT[ m ].weight;
			s1=m;
			break;
		}
	}
	for( int j = m+1 ; j <= i ; j++ ) // 第一遍选第一个最小的
	{
		if( HT[ j ].parent != 0 ) continue;
		else
		{
			if( sm1 > HT[ j ].weight ) 
			{
				sm1 = HT[ j ].weight;
				s1 = j;
			}
		}
	}
	for( m = 1 ; m <= i ; m++ ) // 第二遍中第一个 parent 为 0 的结点
	{
		if( HT[ m ].parent != 0 ) continue;
		else 
		{
			sm2 = HT[ m ].weight;
			s2=m;
			if( s2 == s1 ) continue;
			else break;
		}
	}
	for( int k = m+1 ; k <= i ; k++ ) // 第二遍选第二个最小的
	{
		if( HT[ k ].parent != 0 ) continue;
		else
		{
			if( (HT[ k ].weight < sm2) && ( k != s1 ) ) // 保证sm1 != sm2
			{
				sm2 = HT[ k ].weight;
				s2 = k;
			}
		}
	}
} // end SelectTwoNode()

//////////////////////////////////////////////////////////////////////////////////////////////////
//
// 要编码的正文存放在 iFile 里
// 通过 fIn 的成员函数 get( inBuf ) 从 iFile 里读入每一个字符 ，其中 char inBuf 为字符缓冲区 ^_^
// 
// 编码后的结果通过 fOut( oftream 的对象 ) 输出到文件 codefile.txt
//
void EnCoding( HuffmanCode* HC , const char* iFile )
{
	ifstream fIn( iFile , ios::in ); 
	char inBuf = '#';
	ofstream fOut( "codefile.txt" , ios::out );

	int sub = 0; // HC 的下标
	while( fIn.get( inBuf ) ) // 读入字符，直至文件结束符
	{
		if( inBuf == ' ' ) 
		{
			sub = 1; // HC[ 1 ].hufCh == ' '
			fOut << HC[ sub ].hufCh;
		}
		else if( inBuf == '\n' )
		{
			continue;
		}
		else 
		{
			//
			// 下标换算
			// 因为 'A' 的 ASCII码为 65 ，又'A' 在 HC 里的下标为 2
			// 即 HC[ 2 ].hufCh == 'A'.  以下的字符雷同
			sub = inBuf - 63;     
			fOut << HC[ sub ].hufCh;
		}			
	}
                   //
	fIn.close();   // 显式关闭文件
	fOut.close();  //
                    
} // end EnCoding()

//////////////////////////////////////////////////////////////////////////////////////////////////
// 
// 本应该把赫夫曼编码放进 map( , ) 里的，但本程序没有这样做，显得效率有点低
// 在求子串相应的字符时，用的方法为在 HC 里进行逐字扫描比较  ^&^
// 文件流的使用同 EeCoding() 里的一样
// 参数 n 为字符个数，p = 2*n-1 就为 HT 的结点数目. HT[p]就为 HT 的根.
//
void DeCoding( HuffmanTree HT , HuffmanCode * HC , const char* iFile , const int n )
{
	ifstream fIn( iFile , ios::in );
	char inBuf = '1';
	ofstream fOut( "textfile.txt" , ios::out );

	char* cd = new char[ n ];  // 储存字串的工作空间
	int start = 0;             // 译码开始标记
	int p = 2 * n - 1;         // 根下标
	int iHC = 1;               // 用于扫描 HC 的循环变量

	while( fIn.get( inBuf ) )  // 循环读入 '0' 或 '1' 或 '\n'
	{
		if( inBuf == '0' )
		{
			if( HT[ p ].lChild != 0 )   // 不是叶子
			{
				cd[ start++ ] = inBuf;  // 压 inBuf 进 cd*
				p = HT[ p ].lChild;     // 进入左子树
				continue;
			}
			else // 左叶子
			{
				cd[ start ] = '\0';     // 子串结束符
				for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
				{
					if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
					{
						fOut << HC[ iHC ].ch;
						break; // 找到，跳出 for
					}
					else continue;
				}

				start = 0;                //
				cd[ start++ ] = inBuf;    // 为读下一子串做准备
				p = HT[ 2*n - 1 ].lChild; // 注意，这时 p 已指向 root 的 left child
			}
		}
		else if( inBuf == '1' )
		{
			if( HT[ p ].rChild != 0 )   // 不是叶子
			{
				cd[ start++ ] = inBuf;  // 压 inBuf 进 cd*
				p = HT[ p ].rChild;     // 进入左子树
				continue;
			}
			else // 右叶子
			{
				cd[ start ] = '\0';     // 子串结束符
				for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
				{
					if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
					{
						fOut << HC[ iHC ].ch;
						break; // 找到，跳出 for
					}
					else continue;
				}

				start = 0;                //
				cd[ start++ ] = '1';      // 为读下一子串做准备
				p = HT[ 2*n - 1 ].rChild; // 注意，这时 p 已指向 root 的 right child
			}
		}
		else if( inBuf == '\n' ) // 有必要的话，加了个回车符，哈哈
		{
			continue;
/*			cd[ start ] = '\0';         // 子串结束符
			for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
			{
				if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
				{
					fOut << HC[ iHC ].ch;
					break; // 找到，跳出 for
				}
				else continue;
			}

			fOut << endl;

			start = 0;            // 
			p = 2*n - 1;          // p 指向 root
*/          			
		}
		else
		{
		}
	}
	
	//
	// 不要忘了最后一个字符。
	cd[ start ] = '\0';
	for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
	{
		if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
			{
				fOut << HC[ iHC ].ch;
				break; // 找到，跳出 for
			}
		else continue;
	}

	delete [] cd; // 释放工作空间
	fIn.close();  // 关闭文件
	fOut.close(); // 关闭文件

} // end DeCoding()

///////////////////////////////////////////////////////////////////////////////////////////////////
// 
// 对 codefile.txt 里的代码进行编排
// 以每行 50 个代码的格式显示在终端上，同时存入文件 codeprint.txt 里
// 
void PrintCode( const char* iFile , const char* oFile )
{
	ifstream fIn( iFile , ios::in );
	char inBuf = '#';
	ofstream fOut( oFile , ios::out );
	
	int count = 0; // 记数, count %= 50

	while( fIn.get( inBuf ) )
	{
		if( count > 49 ) { cout << endl; fOut << endl; count%=50; } // 换行
		cout << inBuf;
		fOut << inBuf;		
		count++;
	}

	fIn.close();
	fOut.close();
} // end PrintCode()
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -