⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 diction.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
📖 第 1 页 / 共 5 页
字号:
const WORD Value_9 = 423;
const WORD Value_10 = 424;
const WORD Value_11 = 425;
const WORD Value_12 = 426;
const WORD Value_13 = 427;
const WORD Value_14 = 428;
const WORD Value_15 = 429;
const WORD Value_16 = 430;
const WORD Value_17 = 431;
const WORD Value_18 = 432;
const WORD Value_19 = 433;
const WORD Value_20 = 434;
const WORD Value_30 = 435;
const WORD Value_40 = 436;
const WORD Value_50 = 437;
const WORD Value_60 = 438;
const WORD Value_70 = 439;
const WORD Value_80 = 440;
const WORD Value_90 = 441;
const WORD Value_100 = 442;
const WORD Value_1000 = 443;
const WORD Value_1000000 = 444;
const WORD Value_1000000000 = 445;
const WORD Value_END = 445;


//Followings are defines of slot Per
const WORD Per_BEGIN = 446;
const WORD Per = 446;
const WORD Per_First = 447;
const WORD Per_Second = 448;
const WORD Per_Third = 449;
const WORD Per_1 = 450;
const WORD Per_2 = 451;
const WORD Per_3 = 452;
const WORD Per_END = 452;


#include "stdafx.h"
//#include "Generate.h"
//#include "UserDDlg.h"//del by xuned
#include "SenLink.h"
#include "direct.h"
#include "Huffman.h"
#include "DictMent.h"
#include "Diction.h"
//#include "RuleCode.h"

#include <string.h>
#include <memory.h>
#include <stdio.h>
#include <ctype.h>
#include <math.h>
//qlp add 5.27
#include "PosTag.h"
#include "ChildFrm.h"
#include "DICTMENT.H"
#include "Bplus.h"

extern DictSearch g_objLexSearch;

// Following defined in WordRes.cpp
extern void DictWordInit(DictNode* pDictNode);
extern void LxhFreeDictNode(DictNode* pDictNode);

//yys 98.5.26 Bgn
char *sLog = "DictRes\\Build.log";
FILE *fpLog;
//yys 98.5.26 End

#define _OutDict
IX_DESC g_EcDict;
void CreateDictInd(LPSTR lpInput,LPSTR lpInd);
extern LPSTR myfgets(LPSTR pszString,int n,FILE *fp);

Dictionary::Dictionary(char *dfname, char *ifname, int isMode) :
	IsamMgr(dfname, DIC_DTA_ITEM_LEN)
{//DictSearch g_objLexSearch;
	idxfname = new char[strlen(ifname) + 1];
	strcpy(idxfname, ifname);
	ifile[0] = new IndexMgr(idxfname, DIC_WORD_LEN, 1);
	idxname[0] = "Word";
	buffer = new char[DIC_WORD_LEN];
	noidxs = 1;
	if ( ifile[0]->created_idx() ) {
		rebuild_idxs();
	}
	if ( isMode == moClosed )
		close();

}

void Dictionary::read_data(void *block)
{
	char *pszPtr = (char *)block;
	memcpy((void *)m_pszOffset, pszPtr, DIC_OFFSET_LEN);
	pszPtr += DIC_OFFSET_LEN;
}

void Dictionary::write_data(void *block)
{
	char *pszPtr = (char *)block;
	memcpy(pszPtr, (void *)m_pszOffset, DIC_OFFSET_LEN);
	pszPtr += DIC_OFFSET_LEN;
}

void Dictionary::fill_buffer(int idxno, long recno)
{
	char *pszPtr = buffer;
	switch (idxno)
	{
		case 0:
			memset(buffer, 0, DIC_WORD_LEN);
			prep(m_pszWord, pszPtr, DIC_WORD_LEN);
			if ( memcmp(m_pszWord,pszPtr,DIC_WORD_LEN) != 0 )
				ASSERT(FALSE);
			pszPtr += DIC_WORD_LEN;
			break;
		default:
			prep(recno, pszPtr, 4);
			break;
	}
}

void Dictionary::clear_buf(void)
{
	char *block = new char[DIC_DTA_ITEM_LEN];
	memset(block, 0, DIC_DTA_ITEM_LEN);
	read_data(block);
	delete block;
}

int Dictionary::GetKeyWordLen()
{
	for ( int Loop=0;Loop<DIC_WORD_LEN;Loop ++ )
		if ( m_pszWord[Loop] == '\0' ) break;

	return Loop;
}

void DecompressIndexOffsetInfo(LPSTR pszSouOffset,long &lOffset,int &nLen)
{
	nLen = MAKELONG(MAKEWORD((BYTE)pszSouOffset[4],(BYTE)pszSouOffset[5]),0);
	lOffset = MAKELONG(MAKEWORD(pszSouOffset[0],pszSouOffset[1]),
					   MAKEWORD(pszSouOffset[2],pszSouOffset[3]));
}

void CompressIndexOffsetInfo(long lOffset,int nLen,LPSTR pszTarOffset)
// nKeywordOrgLen 英文关键字的压缩前长度
{
	ASSERT( lOffset < 0xffffff );
	ASSERT( nLen < 0xffff );
	pszTarOffset[0] = LOBYTE(LOWORD(lOffset));
	pszTarOffset[1] = HIBYTE(LOWORD(lOffset));
	pszTarOffset[2] = LOBYTE(HIWORD(lOffset));
	pszTarOffset[3] = HIBYTE(HIWORD(lOffset));
	pszTarOffset[4] = LOBYTE(LOWORD(nLen));
	pszTarOffset[5] = HIBYTE(LOWORD(nLen));

#ifdef _DEBUG
	long lResOffset;
	int nResLen;
	DecompressIndexOffsetInfo(pszTarOffset,lResOffset,nResLen);
	ASSERT( lResOffset == lOffset );
	ASSERT( nLen == nResLen );
#endif
}

long WriteIndexData(FILE *fpIndexDat,LPCTSTR pszIndexData,int nLen)
{
	long lSite = ftell(fpIndexDat);
	fwrite(pszIndexData,sizeof(char),nLen,fpIndexDat);
	return lSite;
}

void ReadIndexData(FILE *fpIndexDat,LPSTR pszIndexData,long lOffset,int nLen)
{
	fseek(fpIndexDat,lOffset,SEEK_SET);
	fread(pszIndexData,sizeof(char),nLen,fpIndexDat);
}

CDictIndex::CDictIndex()
{
}

CDictIndex::~CDictIndex()
{
}

LPSTR myfgets(LPSTR pszString,int n,FILE *fp)
{
	LPSTR pszTep;
	fgets(pszString,n,fp);

	pszTep = strchr(pszString,0x0d);
	if ( pszTep != NULL ) {
		*pszTep = '\0';
	}
	return pszString;
}

BOOL CDictIndex::FillMap(CMapStringToOb &mapName,
						 FILE *fpInDefine,
						 FILE *fpOutDefine,
						 FILE *fpCodeTable,
						 BOOL bWrtSegName)
// bWrtSegName 是否输出节名到定义文件和编码文件中,此变量仅用于
// DEFINE.TXT中NoValSlot节
{
	CString strOutDefine;
	strOutDefine.Format("\r\n\r\n//Followings are defines of slot %s\r\n",
		m_szSegmentName);
	fputs(strOutDefine,fpOutDefine);
	
	strOutDefine.Format("const WORD %s_BEGIN = %d;\r\n",
		m_szSegmentName,m_nDefineValue);
	fputs(strOutDefine,fpOutDefine);
	
	CString strCodeTable;
	
	strCodeTable.Format("%s %d\r\n",
				m_szSegmentName,m_nDefineValue);
	fputs(strCodeTable,fpCodeTable);

	strOutDefine.Format("const WORD %s = %d;\r\n",
				m_szSegmentName,m_nDefineValue);
	fputs(strOutDefine,fpOutDefine);

	BOOL bIsQualfr;
	if ( strcmp(m_szSegmentName,"AddQualfr") == 0 )
		bIsQualfr = TRUE;
	else
		bIsQualfr = FALSE;
	
	m_nDefineValue ++;

	const int MAX_DEFINE_LINE_LEN = 100;
	char szLine[MAX_DEFINE_LINE_LEN];
	CString strLine;
	ObWord *pObject;

	do {
		myfgets(szLine,MAX_DEFINE_LINE_LEN,fpInDefine);

		if ( feof(fpInDefine) )
				break;
		else if ( szLine[0] == '[' ) {
			LPSTR pszTep;
			pszTep = strchr(szLine,']');
			if ( pszTep ==  NULL ) {
				ASSERT(FALSE);
			}
			*pszTep = '\0';

			strOutDefine.Format("const WORD %s_END = %d;\r\n",
				m_szSegmentName,m_nDefineValue-1);
			fputs(strOutDefine	,fpOutDefine);

			strcpy(m_szSegmentName,szLine+1);
			break;
		}

		strLine = szLine;
		if ( bWrtSegName ) {
				strCodeTable.Format("%s=%s %d\r\n",
					m_szSegmentName,szLine,m_nDefineValue);
		} else
			strCodeTable.Format("%s %d\r\n",
					szLine,m_nDefineValue);
		fputs(strCodeTable,fpCodeTable);
		
		// 在输出到DICTDEF.H文件前,将待输出行中的'/'换为'_'
		char *pLine = szLine;
		do {
			if ( *pLine == '/' )
				*pLine = '_';
			pLine ++;
		} while( *pLine != '\0' );

		if ( bWrtSegName ) {
			if ( bIsQualfr == FALSE ) {
				strOutDefine.Format("const WORD %s_%s = %d;\r\n",
					m_szSegmentName,szLine,m_nDefineValue);
				fputs(strOutDefine,fpOutDefine);
			}
		} else {
			strOutDefine.Format("const WORD %s = %d;\r\n",
				szLine,m_nDefineValue);
			fputs(strOutDefine,fpOutDefine);
		}
		
		pObject = new ObWord;
		pObject->GiveVolue(m_nDefineValue);
		m_nDefineValue ++;
		mapName.SetAt( strLine, (ObWord*)pObject );
	} while ( TRUE );
	return TRUE;
}

BOOL CDictIndex::FillMapWithArray(CMapStringToOb &mapName,
								CStringArray &arrayName,
								int &nNameNum,FILE *fpInDefine,
								FILE *fpOutDefine)
{
	char szLine[MAX_DEFINE_LINE_LEN];
	nNameNum = 0;
	CString strLine;
	ObWord *pObject;
	do {
		myfgets(szLine,MAX_DEFINE_LINE_LEN,fpInDefine);
		if ( feof(fpInDefine) )
			break;
		else if ( szLine[0] == '[' ) {
			LPSTR pszTep;
			pszTep = strchr(szLine,']');
			if ( pszTep ==  NULL ) {
				ASSERT(FALSE);
			}
			*pszTep = '\0';

			strcpy(m_szSegmentName,szLine+1);
			break;
		}

		nNameNum ++;
		strLine = szLine;
		arrayName.Add(strLine);
		pObject = new ObWord;
		pObject->GiveVolue(nNameNum-1);
		mapName.SetAt( strLine,(ObWord*)pObject );

		if ( strcmp(strLine,"AddQualfr") == 0 )
			m_nQualfrCode = nNameNum-1;

	} while ( TRUE );

	return TRUE;
}

BOOL CDictIndex::BuildIndexInit(LPSTR pszInDefineName,
								LPSTR pszOutDefineName,
								LPSTR pszCodeTable)
// pszInDefineName 原始的定义文件 DEFINE.TXT
// pszOutDefineName 输出的定义文件 DictDef.h
// pszCodeTable 编码对照表文件
{
	//yys 5.26
	fpLog = fopen(sLog , "w");
	if( !fpLog ){
		AfxMessageBox("Cann't Creat file!",MB_OK);
		return FALSE;
	}

	FILE *fpInDefine = fopen(pszInDefineName,"rb");
	if ( fpInDefine == NULL ) {
		CString strMsg;
		strMsg.Format(" 无法打开文件 %s !",pszInDefineName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	
	FILE *fpOutDefine = fopen(pszOutDefineName,"wb");
	if ( fpOutDefine == NULL ) {
		CString strMsg;
		strMsg.Format("无法创建文件 %s !",pszOutDefineName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	
	FILE *fpCodeTable = fopen(pszCodeTable,"wb");
	if ( fpOutDefine == NULL ) {
		CString strMsg;
		strMsg.Format("无法创建文件 %s !",pszCodeTable);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	fputs("#ifndef _DICTDEF_H",fpOutDefine);
	fputc('\n',fpOutDefine);
	fputs("#define _DICTDEF_H",fpOutDefine);

	m_nDefineValue = 1;

	// 跳过第一行
	char szLine[MAX_DEFINE_LINE_LEN];
	myfgets(szLine,MAX_DEFINE_LINE_LEN,fpInDefine);
	LPSTR pszTep;
	pszTep = strchr(szLine,']');
	if ( pszTep ==  NULL ) {
		ASSERT(FALSE);
	}
	*pszTep = '\0';
	strcpy(m_szSegmentName,szLine+1);
	
	FillMap(m_mapCate,fpInDefine,fpOutDefine,fpCodeTable,TRUE);
	FillMap(m_mapHead,fpInDefine,fpOutDefine,fpCodeTable,TRUE);
	FillMapWithArray(m_mapSlotName,m_arraySlotName,
			m_nSlotNameNum,fpInDefine,fpOutDefine);
	//FillMap(m_mapSlotName,fpInDefine);
	FillMap(m_mapNoValueSlot,fpInDefine,fpOutDefine,fpCodeTable,FALSE);
	FillMap(m_mapAmbig,fpInDefine,fpOutDefine,fpCodeTable,TRUE);
	
	for ( int Loop=0;Loop<m_nSlotNameNum;Loop++ ) {
		FillMap(m_mapSlotValue[Loop],fpInDefine,fpOutDefine,fpCodeTable,TRUE);
	}
	fputs("#endif",fpOutDefine);
	fclose(fpInDefine);
	fclose(fpOutDefine);
	fclose(fpCodeTable);

	// 申请缓冲区
	m_nWordInfoBuffSize = MAX_RECORD_LEN;
    m_pszWordInfoBuff = (LPSTR)GlobalLock(GlobalAlloc(GMEM_MOVEABLE|GMEM_SHARE,
                       m_nWordInfoBuffSize));
	return TRUE;
}

void CDictIndex::FreeMap(CMapStringToOb &mapName)
{
	POSITION pos = mapName.GetStartPosition();
	CString string;
	while( pos != NULL ) {
		ObWord* pObject;
		mapName.GetNextAssoc( pos, string, ( CObject*& )pObject );
		delete pObject;
	}
	mapName.RemoveAll();
}

void CDictIndex::BuildIndexExitInit()
// 释放保留字数组占用的空间
{
	FreeMap(m_mapCate);
	FreeMap(m_mapHead);
	FreeMap(m_mapAmbig);
	FreeMap(m_mapNoValueSlot);
	
	m_arraySlotName.RemoveAll();
	for ( int Loop=0;Loop<m_nSlotNameNum;Loop++ ) {
		FreeMap(m_mapSlotValue[Loop]);
	}

	GlobalUnlock(GlobalHandle(m_pszWordInfoBuff));
	GlobalFree(GlobalHandle(m_pszWordInfoBuff));
}

void DecodeWordRule(LPSTR pszCodedRule,LPSTR pszOrgRule)
// 对编码后的规则进行解码
// pszCodedRule 编码后的规则
// pszOrgRule 解码后的规则
// 注:
// 编码后的规则的格式:
// 规则个数(1 byte),规则左部的长度(1 byte),规则左部,
// 规则右部的长度(1 byte),规则右部,...
{
	LPSTR pszCodePtr = pszCodedRule;
	LPSTR pszOrgPtr = pszOrgRule;

	UCHAR ucRuleNum = *pszCodePtr;
	pszCodePtr ++;

	UCHAR ucLen;
	for ( char Loop = 0;Loop<ucRuleNum;Loop++ ) {
		strcpy(pszOrgPtr,"@");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -