⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dictment.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
📖 第 1 页 / 共 3 页
字号:
#include "stdafx.h"
//#include "DictMent.h"
#include "Diction.h"

#include <stdio.h>
#include <string.h>
#include <memory.h>

COneWord::COneWord()
{
	m_pFirstChinese = NULL;
	m_pLastChinese = NULL;
	m_nEnglishLen = 0;
	m_pszEnglish = NULL;
	m_nCurrReadLineNum = 0;
	
	m_pszOrig = NULL;
	m_nOrigLen = 0;

	m_pszAmbig = NULL;
	m_nAmbigLen = 0;

	m_bIsExistOrig = FALSE;
	m_bIsExistAmbig = FALSE;

	m_mapCate.RemoveAll();
	m_mapHead.RemoveAll();
	m_mapAmbig.RemoveAll();
	m_mapNoValueSlot.RemoveAll();
	m_mapCate.RemoveAll();
	
	m_arraySlotName.RemoveAll();
	for ( int Loop=0;Loop<MAX_SLOT_NAME_NUM;Loop++ ) {
		m_mapSlotValue[Loop].RemoveAll();
	}
}

COneWord::~COneWord()
{

}

BOOL COneWord::FillMap(CMapStringToOb &mapName,FILE *fpDefine)
{
	const int MAX_DEFINE_LINE_LEN = 100;
	char szLine[MAX_DEFINE_LINE_LEN];
	LPSTR pszTep;
	CString strLine;
	ObWord *pObject;
	int nIndex = 0;
	do {
		fgets(szLine,MAX_DEFINE_LINE_LEN,fpDefine);
		if ( feof(fpDefine) )
			break;
		else if ( szLine[0] == '[' )
			break;
		pszTep = strchr(szLine,0x0d);
		if ( pszTep == NULL ) {
			ASSERT(FALSE);
		}
		*pszTep = '\0';
		
		strLine = szLine;
		pObject = new ObWord;
		pObject->GiveVolue(nIndex);
		nIndex ++;
		mapName.SetAt( strLine, (ObWord*)pObject );
	} while ( TRUE );
	return TRUE;
}

BOOL COneWord::FillMapWithArray(CMapStringToOb &mapName,
								CStringArray &arrayName,
								int &nNameNum,FILE *fpDefine)
{
	char szLine[MAX_DEFINE_LINE_LEN];
	LPSTR pszTep;

	CString strLine;
	ObWord *pObject;
	nNameNum = 0;
	do {
		fgets(szLine,MAX_DEFINE_LINE_LEN,fpDefine);
		if ( feof(fpDefine) )
			break;
		else if ( szLine[0] == '[' )
			break;
		pszTep = strchr(szLine,0x0d);
		if ( pszTep == NULL ) {
			ASSERT(FALSE);
		}
		*pszTep = '\0';
		
		nNameNum ++;

		strLine = szLine;
		arrayName.Add(strLine);
		pObject = new ObWord;
		pObject->GiveVolue(nNameNum-1);
		mapName.SetAt( strLine,(ObWord*)pObject );
	} while ( TRUE );

	return TRUE;
}

BOOL COneWord::Init(LPSTR pszDefineName)
// 初始化
{
	FILE *fpDefine = fopen(pszDefineName,"rb");
	if ( fpDefine == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't Open %s ",pszDefineName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	
	// 跳过第一行
	char szLine[MAX_DEFINE_LINE_LEN];
	fgets(szLine,MAX_DEFINE_LINE_LEN,fpDefine);

	FillMap(m_mapCate,fpDefine);
	FillMap(m_mapHead,fpDefine);
	FillMapWithArray(m_mapSlotName,m_arraySlotName,m_nSlotNameNum,fpDefine);
	FillMap(m_mapNoValueSlot,fpDefine);
	FillMap(m_mapAmbig,fpDefine);
	
	for ( int Loop=0;Loop<m_nSlotNameNum;Loop++ ) {
		//ASSERT( Loop != 13 );
		FillMap(m_mapSlotValue[Loop],fpDefine);
	}
	
	fclose(fpDefine);
	return TRUE;
}

void COneWord::FreeMap(CMapStringToOb &mapName)
{
	POSITION pos;
	ObWord* pObject;
	CString string;
	for( pos = mapName.GetStartPosition(); pos != NULL; ) {
		mapName.GetNextAssoc( pos,string, (CObject*&)pObject );
		delete pObject;
	}
  
	mapName.RemoveAll();
}

void COneWord::ExitInit()
// 释放保留字数组占用的空间
{
	FreeMap(m_mapCate);
	FreeMap(m_mapHead);
	FreeMap(m_mapSlotName);
	FreeMap(m_mapNoValueSlot);
	FreeMap(m_mapAmbig);
	
	m_arraySlotName.RemoveAll();
	for ( int Loop=0;Loop<m_nSlotNameNum;Loop++ ) {
		FreeMap(m_mapSlotValue[Loop]);
	}
}

BOOL COneWord::SetEnglishOfWordItem(LPSTR pszEnglish,int nEnglishLen)
{
	if ( nEnglishLen <= 0 ) {
		CString strMsg;
		strMsg.Format("ERROR 1,在%s的%d行(无英文)",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	// 将英文词条添入结构中
	m_nEnglishLen = nEnglishLen;
	m_pszEnglish = new char[m_nEnglishLen+1];
	memcpy(m_pszEnglish,pszEnglish,m_nEnglishLen);
	m_pszEnglish[m_nEnglishLen] = '\0';
	return TRUE;
}

BOOL COneWord::SetChiTextOfWordItem(LPSTR pszChinese,int nChineseLen)
{
	if ( nChineseLen <= 0 ) {
		m_pCurrChinese->m_nChineseLen = 0;
		m_pCurrChinese->m_pszChinese = NULL;
	} else {
		// 将中文词条添入结构中
		m_pCurrChinese->m_nChineseLen = nChineseLen;
		m_pCurrChinese->m_pszChinese = new char[m_pCurrChinese->m_nChineseLen+1];
		memcpy(m_pCurrChinese->m_pszChinese,pszChinese,
			m_pCurrChinese->m_nChineseLen);
		m_pCurrChinese->m_pszChinese[m_pCurrChinese->m_nChineseLen] = '\0';
	}
	return TRUE;
}

BOOL COneWord::SetCateOfWordItem(LPSTR pszCate,int nCateLen)
{
	if ( nCateLen <= 0 ) {
		CString strMsg;
		strMsg.Format("ERROR 3,在%s的%d行: Cate空",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
			}
		return FALSE;
	}
	m_pCurrChinese->m_pszCate = new char[nCateLen+1];
	memcpy(m_pCurrChinese->m_pszCate,pszCate,nCateLen);
	m_pCurrChinese->m_pszCate[nCateLen] = '\0';

	ObWord* pObject;
	if ( m_mapCate.Lookup(m_pCurrChinese->m_pszCate,( CObject*& )pObject) == FALSE ) {
		// Not Found
		CString strMsg;
		strMsg.Format("ERROR 20,第%d行: Cate的值%s非法",
			m_pszDicName,m_nCurrReadLineNum,m_pCurrChinese->m_pszCate);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	return TRUE;
}

BOOL COneWord::SetHeadOfWordItem(LPSTR pszHead,int nHeadLen)
{
	if ( nHeadLen <= 0 ) {
		CString strMsg;
		strMsg.Format("ERROR 4,第%d行: Head空",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
//			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	m_pCurrChinese->m_pszHead = new char[nHeadLen+1];
	memcpy(m_pCurrChinese->m_pszHead,pszHead,nHeadLen);
	m_pCurrChinese->m_pszHead[nHeadLen] = '\0';

	ObWord* pObject;
	if ( m_mapHead.Lookup(m_pCurrChinese->m_pszHead,( CObject*& )pObject) == FALSE ) {
		// Not Found
		CString strMsg;
		strMsg.Format("ERROR 21,第%d行: Head的值%s非法",
			m_pszDicName,m_nCurrReadLineNum,m_pCurrChinese->m_pszHead);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
//			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	return TRUE;
}

BOOL COneWord::SetOrigWord(LPSTR pszLeft)
{
	m_bIsExistOrig = TRUE;

	LPSTR pszTep;
	pszTep = strchr(pszLeft,'[');
	if ( pszTep == NULL ) {
		CString strMsg;

		strMsg.Format("ERROR 5,第%d行: 找不到 [ ",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	m_nOrigLen = pszTep - pszLeft;
	m_pszOrig = new char[m_nOrigLen+1];
	memcpy(m_pszOrig,pszLeft,m_nOrigLen);
	m_pszOrig[m_nOrigLen] = '\0';
	
	// 取词条类型
	pszLeft = pszTep;
	if ( strncmp(pszLeft,"[ed]",4) == 0 ) {
		m_nWordStyle = STYLE_ED;
		pszLeft += 4;
	} else if ( strncmp(pszLeft,"[ing]",5) == 0 ) {
		m_nWordStyle = STYLE_ING;
		pszLeft += 5;
	} else if ( strncmp(pszLeft,"[is]",4) == 0 ) {
		m_nWordStyle = STYLE_IS;
		pszLeft += 4;
	} else if ( strncmp(pszLeft,"[am]",4) == 0 ) {
		m_nWordStyle = STYLE_AM;
		pszLeft += 4;
	} else if ( strncmp(pszLeft,"[are]",5) == 0 ) {
		m_nWordStyle = STYLE_ARE;
		pszLeft += 5;
	} else if ( strncmp(pszLeft,"[s]",3) == 0 ) {
		m_nWordStyle = STYLE_S;
		pszLeft += 3;
	} else if ( strncmp(pszLeft,"[ed1]",5) == 0 ) {
		m_nWordStyle = STYLE_ED1;
		pszLeft += 5;
	} else if ( strncmp(pszLeft,"[ed2]",5) == 0 ) {
		m_nWordStyle = STYLE_ED2;
		pszLeft += 5;
	} else {
		CString strMsg;
		strMsg.Format("ERROR 6,第%d行: []中词条类型错",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	
	if ( pszLeft[0] == ',' && pszLeft[1] == 'A' ) { // 下面是兼类
		pszLeft++;
		if ( !SetAmbig(pszLeft) )
			return FALSE;
	} else if ( pszLeft[0] == 0x0d ) {
		
	} else {
		CString strMsg;
		strMsg.Format("ERROR 7,第%d行: 本行尾的内容未知",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	return TRUE;
}

BOOL COneWord::SetAmbig(LPSTR pszLeft)
{
	m_bIsExistAmbig = TRUE;
	
	LPSTR pszTep = strchr(pszLeft,'=');
	if ( pszTep == NULL ) {
		CString strMsg;
		strMsg.Format("ERROR 8,第%d行: 兼类内容不全",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	pszLeft = pszTep + 1;
	pszTep = strchr(pszLeft,0x0d );
	if ( pszTep == NULL ) {
		CString strMsg;
		strMsg.Format("ERROR 9,第%d行",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	m_nAmbigLen = pszTep - pszLeft;
	m_pszAmbig = new char[m_nAmbigLen + 1];
	memcpy(m_pszAmbig,pszLeft,m_nAmbigLen);
	m_pszAmbig[m_nAmbigLen] = '\0';

	ObWord* pObject;
	if ( m_mapAmbig.Lookup(m_pszAmbig,( CObject*& )pObject) == FALSE ) {
		// Not Found
		CString strMsg;
		strMsg.Format("ERROR 22,第%d行: Ambig的值非法",
			m_pszDicName,m_nCurrReadLineNum);
		if ( m_bIsWriteLog ) {
			fputs(strMsg,m_fpErrLogFile);
			fputs("\r\n",m_fpErrLogFile);
		} else
			AfxMessageBox(strMsg);

		if ( m_bIsWriteErrToDic ) {
			fputs(strMsg,m_fpTarDic);
			fputs("\r\n",m_fpTarDic);
		}
		return FALSE;
	}
	return TRUE;
}

Slot *COneWord::MakeSlot(BOOL bIsTranRule,LPSTR pszSlotName,
						 int nSlotNameLen,LPSTR pszSlotValue,
						 int nSlotValueLen)
{
	Slot *pSlot = new Slot;
	pSlot->m_bIsTranRule = bIsTranRule;

	pSlot->m_pszSlotName = new char[nSlotNameLen+1];
	memcpy(pSlot->m_pszSlotName,pszSlotName,nSlotNameLen);
	pSlot->m_pszSlotName[nSlotNameLen] = '\0';

	if ( nSlotValueLen > 0 ) {
		pSlot->m_pszSlotValue = new char[nSlotValueLen+1];
		memcpy(pSlot->m_pszSlotValue,pszSlotValue,nSlotValueLen);
		pSlot->m_pszSlotValue[nSlotValueLen] = '\0';
	} else {
		pSlot->m_pszSlotValue = NULL;
	}
	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -