⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dicthash.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
字号:
#include "stdafx.h"
//#include "DictMent.h"
#include "Diction.h"
#include "PosTag.h"
#include "ChildFrm.h"
#include <stdio.h>
#include <string.h>
#include <memory.h>

void DictSearch::SetHashTableValue(LPCSTR pszKeyword)
{
	UINT nHashValue = HashKey(pszKeyword,( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize);
	UINT nOffset = nHashValue / 8;
	UINT nOffsetInByte = nHashValue % 8;

	UCHAR chNeed = m_pszHashTable[nOffset];
	chNeed &= m_narrayDector[nOffsetInByte];
	if ( chNeed != 0 )	
		m_nSameHushWordsNum	++;

	m_pszHashTable[nOffset] |= (BYTE)m_narrayDector[nOffsetInByte];
}

BOOL DictSearch::HashFindPossible(LPCSTR pszKeyword)
{
	UINT nHashValue = HashKey(pszKeyword,( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize);
	UINT nOffset = nHashValue / 8;
	UINT nOffsetInByte = nHashValue % 8;
	UCHAR chNeed = m_pszHashTable[nOffset];
	chNeed &= m_narrayDector[nOffsetInByte];

	return chNeed;
}

inline UINT DictSearch::HashKey(LPCSTR pszKeyword,UINT nHashLen) const
{
	UINT nHash = 0;
	while (*pszKeyword)
		nHash = (nHash<<5) + nHash + *pszKeyword++;
	return nHash % nHashLen;
}

void DictSearch::SetHableTableSize()
{
	m_nHashTableOffset = 0;
	( (CPosTagApp*) AfxGetApp() ) ->m_nHashTableSize = 0xfffff;//0xa0000(7008);
}

BOOL DictSearch::AllocHashTableMemory()
{
	( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable = NULL;
	( (CPosTagApp* ) AfxGetApp() ) ->m_pszAddtionHashTable = NULL;

	if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize > 0 ) {
		( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable = (LPSTR)GlobalLock(GlobalAlloc(
						GMEM_MOVEABLE|GMEM_SHARE|GMEM_ZEROINIT,
						( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize/8 + 10));
		if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable == NULL ) // 内存不够
			return FALSE;
		
		( (CPosTagApp* ) AfxGetApp() ) ->m_pszAddtionHashTable = (LPSTR)GlobalLock(GlobalAlloc(
						GMEM_MOVEABLE|GMEM_SHARE|GMEM_ZEROINIT,
						( (CPosTagApp*) AfxGetApp() ) ->m_nHashTableSize/8 + 10));
		if (( (CPosTagApp* ) AfxGetApp() ) -> m_pszAddtionHashTable == NULL ) // 内存不够
			return FALSE;
	}
	
	m_pszHashTable = ( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable;		//另默认的值为m_pszSysHashTable
	
	( (CPosTagApp* ) AfxGetApp() ) ->m_pszUserHashTable = NULL;

	if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize > 0 ) {
		( (CPosTagApp* ) AfxGetApp() ) ->m_pszUserHashTable = (LPSTR)GlobalLock(GlobalAlloc(
						GMEM_MOVEABLE|GMEM_SHARE|GMEM_ZEROINIT,
						( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize/8 + 10));
		if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_pszUserHashTable == NULL ) // 内存不够
			return FALSE;
	}

	bHaveAllocMemory = TRUE;
	return TRUE;
}

void DictSearch::FreeHashTableMemory()
{
	if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable != NULL ) {
		GlobalUnlock(GlobalHandle(( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable));
		GlobalFree(GlobalHandle(( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable));
		( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable = NULL;
	}
/*
	if ( m_pszAddtionHashTable != NULL ) {
		GlobalUnlock(GlobalHandle(m_pszAddtionHashTable));
		GlobalFree(GlobalHandle(m_pszAddtionHashTable));
		m_pszAddtionHashTable = NULL;
	}
	
	if ( m_pszUserHashTable != NULL ) {
		GlobalUnlock(GlobalHandle(m_pszUserHashTable));
		GlobalFree(GlobalHandle(m_pszUserHashTable));
		m_pszUserHashTable = NULL;
	}
	*/
}

BOOL DictSearch::CreateSingleDictHashTable(LPSTR pszDictName)
{
	FILE *fpDict;
	if ( (fpDict = fopen(pszDictName,"rb") ) == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't open file %s !",pszDictName);
		AfxMessageBox(strMsg);
		return FALSE;
	}

	// Add for Debug Begin
	m_nSameHushWordsNum = 0;
	// Add for Debug End

	char szLine[MAX_DICTLINE_LEN];
	LPSTR pszPtr;
	do {
		fgets(szLine,MAX_DICTLINE_LEN,fpDict);
		if ( feof(fpDict) ) break;

		if ( szLine[0] != '#' )
			continue;
		pszPtr = strchr(szLine+1,'\\');
		ASSERT( pszPtr != NULL );
		*pszPtr = '\0';

		SetHashTableValue(szLine+1);
	} while ( TRUE );

	fclose(fpDict);

	// Add for Debug Begin
	FILE *fpInfo;
	char szInfoName[] = "DictRes\\HashInfo.txt";
	if ( (fpInfo = fopen(szInfoName,"ab") ) == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't write file %s !",szInfoName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	fprintf(fpInfo,"Dict = %s,Same Hash Num = %d\n",
				pszDictName,m_nSameHushWordsNum);
	fclose(fpInfo);
	// Add for Debug End

	return TRUE;
}

BOOL DictSearch::CreateHashTableAndAddToDat(FILE *fpIndexDat,
				 DictHeader &phHead,char *szDicName)
{
	SetHableTableSize();

	if ( AllocHashTableMemory() == FALSE ) {
		AfxMessageBox("No enough memory !");
		return FALSE;
	}

	//yys 98.5.14 Bgn
	CreateSingleDictHashTable(szDicName);
	//yys 98.5.14 End

	phHead.nHashTableSize = ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize;
	phHead.nHashTableOffset = 0;
	
	phHead.nHashTableOffset = ftell(fpIndexDat);
	if ( phHead.nHashTableSize > 0 )
		fwrite(m_pszHashTable,phHead.nHashTableSize/8+1,
					1,fpIndexDat);

	FreeHashTableMemory();
	return TRUE;
}

BOOL DictSearch::LoadHushTable()
{
	FILE *fpHash;
	
	char szHashTableName[] = "DictRes\\LilyData.Dat";
	if ( (fpHash = fopen(szHashTableName,"rb") ) == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't open file %s !",szHashTableName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	fseek(fpHash,0L,SEEK_SET);

	DictHeader hushHead;
	fread(&hushHead,sizeof(DictHeader),1,fpHash);

	( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize = hushHead.nHashTableSize;
	
	if ( AllocHashTableMemory() == FALSE )
		return FALSE;

	if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize > 0 ) {
		fseek(fpHash,hushHead.nHashTableOffset,SEEK_SET);
		fread(m_pszHashTable,( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize/8 + 1,1,fpHash);
	}

	fclose(fpHash);
	
	return TRUE;
}

//Load系统和系统附加词典数据
int DictSearch::LoadSysHushTableFromIndexDat(FILE *fpSysIndexDat,FILE *fpAddtionIndexDat)
{//for qlp
	DictHeader hushHead;
		
	fseek(fpSysIndexDat,0L,SEEK_SET);
	fread(&hushHead,sizeof(DictHeader),1,fpSysIndexDat);	
	( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize = hushHead.nHashTableSize;

	if ( !bHaveAllocMemory ){
		if ( AllocHashTableMemory() == FALSE )		//分配内存
			return 0;
	}

	
	if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize > 0 ) {
		fseek(fpSysIndexDat,hushHead.nHashTableOffset,SEEK_SET);
		fread(( (CPosTagApp* ) AfxGetApp() ) ->m_pszSysHashTable ,( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize/8 + 1,1,fpSysIndexDat);
	}

	fseek(fpAddtionIndexDat,0L,SEEK_SET);
	fread(&hushHead,sizeof(DictHeader),1,fpAddtionIndexDat);	
	( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize = hushHead.nHashTableSize;
	
	if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize > 0 ) {
		fseek(fpAddtionIndexDat,hushHead.nHashTableOffset,SEEK_SET);
		fread(( (CPosTagApp* ) AfxGetApp() ) ->m_pszAddtionHashTable,( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize/8 + 1,1,fpAddtionIndexDat);
	}

	return 1;
}

//Load用户词典数据
BOOL DictSearch::LoadUserHushTableFromIndexDat(FILE *fpUserIndexDat)
{
	DictHeader hushHead;

	fseek(fpUserIndexDat,0L,SEEK_SET);
	fread(&hushHead,sizeof(DictHeader),1,fpUserIndexDat);
	( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize = hushHead.nHashTableSize;
	
	if ( ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize > 0 ) {
		fseek(fpUserIndexDat,hushHead.nHashTableOffset,SEEK_SET);
		fread(( (CPosTagApp* ) AfxGetApp() ) ->m_pszUserHashTable,( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize/8 + 1,1,fpUserIndexDat);
	}	

	return TRUE;
}

//yys 98.5.14 Bgn
//添加一个形参szDicName
BOOL DictSearch::CreateHashTable(char *szDicName)
//yys 98.5.14 End
{
	SetHableTableSize();
	if ( AllocHashTableMemory() == FALSE )
		return FALSE;
	
	//yys 98.5.14 Bgn
	CreateSingleDictHashTable(szDicName);
	//yys 98.5.14 End

	char szHashTableName[] = "DictRes\\LilyData.Dat";
	FILE *fpHash;
	if ( (fpHash = fopen(szHashTableName,"wb") ) == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't write file %s !",szHashTableName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	
	DictHeader hushHead;
	
	strcpy(hushHead.szMagic,szDictDataMagic);

	hushHead.nHashTableSize = ( (CPosTagApp* ) AfxGetApp() ) ->m_nHashTableSize;
	hushHead.nHashTableOffset = 0;
	
	fwrite(&hushHead,sizeof(DictHeader),1,fpHash);
	hushHead.nHashTableOffset = ftell(fpHash);
	if ( hushHead.nHashTableSize > 0 )
		fwrite(m_pszHashTable,hushHead.nHashTableSize/8+1,1,fpHash);
	fclose(fpHash);

	if ( (fpHash = fopen(szHashTableName,"r+b") ) == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't write file %s !",szHashTableName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	fseek(fpHash,0L,SEEK_SET);
	fwrite(&hushHead,sizeof(DictHeader),1,fpHash);
	fclose(fpHash);
	
	AfxMessageBox("Finish!");
	
	return TRUE;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -