⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 huffman.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
字号:
#include "stdafx.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "Huffman.h"

unsigned char m_cBitContainer;
int m_cBitCounter;
LPSTR m_pszTarStr;
int m_nTarSite;

CHuffman::CHuffman()
{
	m_cBitContainer = (UCHAR)0;
	m_cBitCounter = 0;

	for ( int Loop=0;Loop<NODETREESIZE;Loop++ ) {
		m_OurTree[Loop].m_lFreq = 0L;
		m_OurTree[Loop].m_nParent = -1;
		m_OurTree[Loop].m_nRight = -1;
		m_OurTree[Loop].m_nLeft = -1;
	}
}

CHuffman::~CHuffman()
{
}

void CHuffman::HuffmanTest()
{
	return;
	int nNodeCounter = 256;
	int Loop;
	for ( Loop=0;Loop<nNodeCounter;Loop++ )	{
		if ( m_OurTree[Loop].m_nRight > 400 )
			ASSERT(FALSE);

		if ( m_OurTree[Loop].m_nRight < -1 )
			ASSERT(FALSE);

		if ( m_OurTree[Loop].m_nLeft > 400 )
			ASSERT(FALSE);

		if ( m_OurTree[Loop].m_nLeft < -1 )
			ASSERT(FALSE);
	}
}

void CHuffman::BuildHufTree(void)
{
	int nNodeCounter = 256;
	int Loop;
	for ( Loop=0;Loop<nNodeCounter;Loop++ )	{
		m_OurTree[Loop].m_nParent = -1;
		m_OurTree[Loop].m_nRight = -1;
		m_OurTree[Loop].m_nLeft = -1;
	}

	while( TRUE ) {
		int nMinFreq0 = -1;
		int nMinFreq1 = -1;
		for ( Loop=0;Loop<nNodeCounter;Loop++ ) {
			if ( Loop != nMinFreq0 ) {
				if ( m_OurTree[Loop].m_lFreq > 0
				  && m_OurTree[Loop].m_nParent == -1 ) {
					if ( (nMinFreq0 == -1)
					 || (m_OurTree[Loop].m_lFreq < m_OurTree[nMinFreq0].m_lFreq) ) {
						if ( nMinFreq1==-1
						  || m_OurTree[nMinFreq0].m_lFreq < m_OurTree[nMinFreq1].m_lFreq ) 
							nMinFreq1=nMinFreq0;
						nMinFreq0 = Loop;
					} else if ( (nMinFreq1 == -1)
						|| (m_OurTree[Loop].m_lFreq < m_OurTree[nMinFreq1].m_lFreq) )
						nMinFreq1 = Loop;
				}
			}
		}
		if ( nMinFreq1 == -1 ) {
			m_nNumOfRootNode = nMinFreq0;
			break;
		}
		m_OurTree[nMinFreq0].m_nParent = nNodeCounter;
		m_OurTree[nMinFreq1].m_nParent = nNodeCounter;
		m_OurTree[nNodeCounter].m_lFreq = m_OurTree[nMinFreq0].m_lFreq
								+ m_OurTree[nMinFreq1].m_lFreq;

		m_OurTree[nNodeCounter].m_nRight = nMinFreq0;
		m_OurTree[nNodeCounter].m_nLeft = nMinFreq1;
		m_OurTree[nNodeCounter].m_nParent = -1;
		nNodeCounter++;
	}

}

void CHuffman::Output1BitToBuff(int nBit)
{
	if ( (m_cBitCounter==8) || (nBit==-1) ) {

		while ( m_cBitCounter < 8 ) {
			m_cBitContainer <<= 1;
			m_cBitCounter ++;
		}

		*(m_pszTarStr + m_nTarSite) = m_cBitContainer;
		m_nTarSite ++;
		m_cBitCounter = 0;

	}
	m_cBitContainer = (m_cBitContainer << 1) | (UCHAR)nBit;
	m_cBitCounter ++;
}

void CHuffman::Compress1ByteToBuff(int node,int child)
{

	if ( m_OurTree[node].m_nParent != -1 )
		Compress1ByteToBuff(m_OurTree[node].m_nParent,node);
	if ( child != -1 ) {
		if ( child == m_OurTree[node].m_nRight )
			Output1BitToBuff(0);
		else if ( child == m_OurTree[node].m_nLeft )
			Output1BitToBuff(1);
	}

}

BOOL CHuffman::CreateHuffmanFreqData(LPSTR pszDicName,
								LPSTR pszHuffmanFreqDataName)
// 产生用于生成HUFFMAN树的统计数据
{

	FILE *fpEngWordDic = fopen(pszDicName,"rb");
	if ( fpEngWordDic == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't Open File %s!",
					  pszDicName);
		AfxMessageBox(strMsg);
		return FALSE;
	}

	FILE *fpHuffDat = fopen(pszHuffmanFreqDataName,"wb");
	if ( fpHuffDat == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't Create File %s !",
					  pszHuffmanFreqDataName);
		AfxMessageBox(strMsg);
		return FALSE;
	}

	unsigned char c;
	int nIndex;
	long lOrigBytes = 0;
	long lActiveSymbs = 0;
	while ( !feof( fpEngWordDic ) ) {
		c = getc(fpEngWordDic);
		
		if ( c==0x0d || c==0x0a ) continue;
		
		if ( m_OurTree[c].m_lFreq == 0 ) {
			lActiveSymbs ++;
		}
		m_OurTree[c].m_lFreq ++;
		lOrigBytes ++;
	}        
	fwrite(&lOrigBytes,sizeof(lOrigBytes),1,fpHuffDat);
	fwrite(&lActiveSymbs,sizeof(lActiveSymbs),1,fpHuffDat);
	
	for ( nIndex=0;	nIndex<256;nIndex++ ) {
		if ( m_OurTree[nIndex].m_lFreq > 0 ) {
			fwrite(&nIndex,sizeof(char),1,fpHuffDat);
			fwrite(&m_OurTree[nIndex].m_lFreq,sizeof(long),1,fpHuffDat);
		}
	}

	fclose(fpEngWordDic);

	fclose(fpHuffDat);
	return TRUE;
}

BOOL CHuffman::CreateHuffmanFreqDataEx(LPSTR pszDicInfoName,
								LPSTR pszHuffmanFreqDataName)
// 产生用于生成HUFFMAN树的统计数据
{

	FILE *fpEngWordDic = fopen(pszDicInfoName,"rb");
	if ( fpEngWordDic == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't Open File %s !",
					  pszDicInfoName);
		AfxMessageBox(strMsg);
		return FALSE;
	}

	FILE *fpHuffDat = fopen(pszHuffmanFreqDataName,"wb");
	if ( fpHuffDat == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't Create File %s !",
					  pszHuffmanFreqDataName);
		AfxMessageBox(strMsg);
		return FALSE;
	}

	unsigned char c;
	int nIndex;
	long lOrigBytes = 0;
	long lActiveSymbs = 0;
	char szLine[500];
	char *pszTep;
	do {
		fgets(szLine,500,fpEngWordDic);
		if ( feof(fpEngWordDic) ) break;
		pszTep = strstr(szLine," ,");
		if ( pszTep != NULL )
			pszTep = '\0';
		pszTep = szLine;
		while ( *pszTep != '\0' ) {
			c = *pszTep;
			pszTep ++;

			if ( m_OurTree[c].m_lFreq == 0 ) {
				lActiveSymbs ++;
			}
			m_OurTree[c].m_lFreq ++;
			lOrigBytes ++;
		}
	} while ( TRUE );

	fwrite(&lOrigBytes,sizeof(lOrigBytes),1,fpHuffDat);
	fwrite(&lActiveSymbs,sizeof(lActiveSymbs),1,fpHuffDat);
	
	for ( nIndex=0;	nIndex<256;nIndex++ ) {
		if ( m_OurTree[nIndex].m_lFreq > 0 ) {
			fwrite(&nIndex,sizeof(char),1,fpHuffDat);
			fwrite(&m_OurTree[nIndex].m_lFreq,sizeof(long),1,fpHuffDat);
		}
	}

	fclose(fpEngWordDic);

	fclose(fpHuffDat);
	return TRUE;
}

BOOL CHuffman::ReadHuffmanFreqDate(LPSTR pszHuffmanFreqDataName)
{
	FILE *fpHuffDat = fopen(pszHuffmanFreqDataName,"rb");
	if ( fpHuffDat == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't Create File %s!",
					  pszHuffmanFreqDataName);
		AfxMessageBox(strMsg);
		return FALSE;
	}

	long lOrigBytes = 0;
	long lActiveSyms;
	fread(&lOrigBytes,sizeof(lOrigBytes),1,fpHuffDat);
	fread(&lActiveSyms,sizeof(lActiveSyms),1,fpHuffDat);

	while ( lActiveSyms-- ) {
		unsigned char c;
		fread(&c,sizeof(char),1,fpHuffDat);
		fread(&m_OurTree[c].m_lFreq,sizeof(long),1,fpHuffDat);
	}

	fclose(fpHuffDat);
	return TRUE;
}

void CHuffman::CompressString(LPSTR pszSouStr,int nSouLen,
							  LPSTR pszTarStr,int &nTarLen)
{

	m_cBitContainer = (UCHAR)0;
	m_cBitCounter = 0;

	unsigned char c = 'A';

	m_nTarSite = 0;
	m_pszTarStr = pszTarStr;
	
 	// 在待压缩串的最大长度小于256的情况使用下面两行
	*m_pszTarStr = (unsigned char)nSouLen;
	m_pszTarStr ++;

	ASSERT(nSouLen < 256);
	// 在待压缩串的最大长度大于256的情况使用下面两行
	//*(short*)m_pszTarStr = (short)nSouLen;
	//m_pszTarStr += sizeof(short);

	while ( c != '\0' ) {
		c = *pszSouStr;
		if ( c == '\0' ) break;
		pszSouStr ++;
		Compress1ByteToBuff(c,0);
	}
	Output1BitToBuff(-1);
	
	nTarLen = m_nTarSite + 1;

	//HuffmanTest();
}	

void CHuffman::DeCompressString(LPCSTR pszSouLine,
					  LPSTR pszTarLine,int &nTarLen)
//注:调用AfxMessageBox后再调用HUFFMAN类的函数将出错

// pszSouLine 待解压缩的字符串
// pszTarLine 解压缩后的字符串
// nTarLen 解压缩后的字符串的长度
{
	LPCSTR pszNowSouPtr = pszSouLine;
	int nNowSouSite = 0;
	nTarLen = 0;
	
	//nSouLen --;

	//unsigned char m_cBitContainer;
	//unsigned char m_cBitCounter = 0;
	int nNumOfTgSymb;

	// 在待压缩串的最大长度小于256的情况使用下面两行
	short sSouLen = (short)*(pszNowSouPtr);
	pszNowSouPtr ++;

	// 在待压缩串的最大长度大于256的情况使用下面两行
	//short sSouLen = *((short*)pszNowSouPtr);
	//pszNowSouPtr += sizeof(short);
	
	while ( sSouLen-- ) {
		nNumOfTgSymb = m_nNumOfRootNode;
		while ( m_OurTree[nNumOfTgSymb].m_nRight != -1 ) {   
			if ( m_cBitCounter == 0 ) {
				
				m_cBitContainer = *pszNowSouPtr;
				pszNowSouPtr ++;
				nNowSouSite ++;
				/*
				if ( nNowSouSite > nSouLen ) {
					// 正常情况下这里应永远执行不到
					*(pszTarLine++) = '\0';
					return;
				}
				*/
				m_cBitCounter = 8;
			}
			if ( m_cBitContainer & 0x80 )
				nNumOfTgSymb = m_OurTree[nNumOfTgSymb].m_nLeft;
			else
				nNumOfTgSymb = m_OurTree[nNumOfTgSymb].m_nRight;
			m_cBitContainer <<= 1;
			m_cBitCounter --;
		}             
		*pszTarLine  = (unsigned char)nNumOfTgSymb;
		pszTarLine ++;
		nTarLen ++;
	}
	*pszTarLine = '\0';
}

BOOL CompressFile()
// 测试用HUFFMAN压缩和解压缩
{
	char szEngWordDicName[] = "english.txt";
	char szHuffmanFreqDataName[] = "HuffFreq.dat";
	CHuffman huffman;
	
	BOOL bResult;
	
	/*bResult = huffman.CreateHuffmanFreqData(szEngWordDicName,szHuffmanFreqDataName);
	if ( !bResult ) {
		return FALSE;
	}*/

	bResult = huffman.ReadHuffmanFreqDate(szHuffmanFreqDataName);
	if ( !bResult ) {
		return FALSE;
	}

	huffman.BuildHufTree();
	
	char szSouStr[200];
	char szTarStr[200];
	char szSou2Str[200];
	FILE *fpInfo;
	fpInfo = fopen("CompInfo.txt","wb");
	if ( fpInfo == NULL ) {
		AfxMessageBox("Cann't Create File!");
		return FALSE;
	}
	CString strInfo;

	int nSouLen,nTarLen,nSou2Len;
	FILE *fpText = fopen(szEngWordDicName,"rb");
	LPSTR pszTep;
	int nMaxSouLen = 0;
	int nMaxTarLen = 0;
	do {
		fgets(szSouStr,200,fpText);
		if ( feof(fpText) ) break;

		pszTep = strchr(szSouStr,0x0d);
		if ( pszTep != NULL ) *pszTep = '\0';
		
		nSouLen = strlen(szSouStr);
		huffman.CompressString(szSouStr,nSouLen,szTarStr,nTarLen);
		memset(szTarStr+nTarLen,0x0,10);
		huffman.DeCompressString(szTarStr,szSou2Str,
								nSou2Len);
		if ( nSouLen > nMaxSouLen )
			nMaxSouLen = nSouLen;
		if ( nTarLen > nMaxTarLen )
			nMaxTarLen = nTarLen;

		// Write Compress Info
		strInfo.Format("Sou Len=%03d,TarLen=%03d\r\n",nSouLen,nTarLen);
		fputs(strInfo,fpInfo);
		
		if ( strcmp(szSouStr,szSou2Str) != 0 ) {
			ASSERT(FALSE);
		}
	} while ( TRUE );
	
	strInfo.Format("Max Sou Len=%03d,Max TarLen=%03d\r\n",
					nMaxSouLen,nMaxTarLen);
	fputs(strInfo,fpInfo);
	fclose(fpInfo);
	fclose(fpText);
	AfxMessageBox("Finish!");
	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -