📄 diction.cpp
字号:
int nEngHuffLen;
int nEngOrgLen;
int nWordsNum;
long lDataSite;
int nWordInfoLen;
BOOL bIsEven = FALSE;
BOOL bResult;
do {
bResult = ReadALineFromEngHuff(fpEnglishHuff,szEngHuff,nEngHuffLen,
nEngOrgLen,nWordsNum,lDataSite,nWordInfoLen);
if ( bResult == FALSE )
break;
bIsEven = !bIsEven;
if ( bIsEven ) continue;
// 第0字节为词典中以该单词为首词的词组中的单词的最大个数
pszWordInfoBuff[0] = (UCHAR)nWordsNum;
nWordInfoLen ++;
ReadIndexData(fpTempDat,pszWordInfoBuff+1,lDataSite,nWordInfoLen);
long lNewDataSite = WriteIndexData(fpIndexDat,pszWordInfoBuff,
nWordInfoLen);
CompressIndexOffsetInfo(lNewDataSite,nWordInfoLen,
obDiction.m_pszOffset);
if ( nMaxKeyLen < nEngHuffLen )
nMaxKeyLen = nEngHuffLen; // 字典中的最大的关键字的长度
if ( nEngHuffLen > DIC_WORD_LEN ) {// 将超过最大词长的关键字的超过部分截断
nEngHuffLen = DIC_WORD_LEN;
nExceedMaxNum ++; // 词典中超过允许最大关键字的词的个数
}
memset(obDiction.m_pszWord,0x0,DIC_WORD_LEN);
memcpy(obDiction.m_pszWord,szEngHuff,nEngHuffLen);
bResult = obDiction.insert();
// ASSERT(bResult == IM_OK); // 词典中有重复的词组
} while ( TRUE );
bIsEven = TRUE;
fseek(fpEnglishHuff,0L,SEEK_SET);
do {
bResult = ReadALineFromEngHuff(fpEnglishHuff,szEngHuff,nEngHuffLen,
nEngOrgLen,nWordsNum,lDataSite,nWordInfoLen);
if ( bResult == FALSE )
break;
bIsEven = !bIsEven;
if ( bIsEven ) continue;
// 第0字节为词典中以该单词为首词的词组中的单词的最大个数
pszWordInfoBuff[0] = (UCHAR)nWordsNum;
nWordInfoLen ++;
ReadIndexData(fpTempDat,pszWordInfoBuff+1,lDataSite,nWordInfoLen);
long lNewDataSite = WriteIndexData(fpIndexDat,pszWordInfoBuff,
nWordInfoLen);
CompressIndexOffsetInfo(lNewDataSite,nWordInfoLen,
obDiction.m_pszOffset);
if ( nEngHuffLen > DIC_WORD_LEN ) {// 将超过最大词长的关键字的超过部分截断
if ( nMaxKeyLen < nEngHuffLen )
nMaxKeyLen = nEngHuffLen; // 字典中的最大的关键字的长度
nEngHuffLen = DIC_WORD_LEN;
nExceedMaxNum ++; // 词典中超过允许最大关键字的词的个数
}
memset(obDiction.m_pszWord,0x0,DIC_WORD_LEN);
memcpy(obDiction.m_pszWord,szEngHuff,nEngHuffLen);
obDiction.insert();
} while ( TRUE );
GlobalUnlock(GlobalHandle(pszWordInfoBuff));
GlobalFree(GlobalHandle(pszWordInfoBuff));
fclose(fpEnglishHuff);
fclose(fpTempDat);
fclose(fpIndexDat);
remove(pszEngHuffName);
remove(szTempDatName);
// 更新数据文件的文件头信息
if ( (fpIndexDat = fopen(szDictDatName,"r+b") ) == NULL ) {
CString strMsg;
strMsg.Format("Cann't write file %s !",szDictDatName);
AfxMessageBox(strMsg);
return FALSE;
}
fseek(fpIndexDat,0L,SEEK_SET);
fwrite(&phHead,sizeof(DictHeader),1,fpIndexDat);
fclose(fpIndexDat);
/*
fprintf(fpLog,"字典中最长的英文词长=%d;\n允许的最大英文词长=%d;\n超过允许的词的个数=%d;\n[注] 词的超过部分将被截掉!\n",
nMaxKeyLen,DIC_WORD_LEN,nExceedMaxNum);
*/
return TRUE;
}
BOOL BuildUserIndex(char *szUserDicName , char *szIndexName )
{
CString strMsg;
char sExeFilePath[256];
//strMsg.Format("本函数将对用户自定义词典重建索引,\n \
//执行本步后,旧的用户自定义索引文件将被删除.\n继续吗?");
GetModuleFileName(NULL,sExeFilePath,255);
_chdir(sExeFilePath);
if ( TRUE ) {
char szInDefineName[] = "DictRes\\define.txt";
//yys 98.5.15 Bgn
char szOutDefineName[] = "DictRes\\DictDefU.h";
//yys 98.5.15 End
//char szDicIndexName[] = "DictRes\\EcDictU"; //原始词典名
char szDicIndexName[1024] ;
char szCodeTableName[] = "DictRes\\CodeTabU.txt";
char szEngInfoName[] = "DictRes\\EngInfoU.txt";
char szEngSortInfoName[] = "DictRes\\SEngInfo.txt";
/*
char szLogFile[] = "DictRes\\Buildidx.log";
FILE *fpLog;
fpLog = fopen(szLogFile , "w+");
if( !fpLog ){
AfxMessageBox("无法生成纪录文件!",MB_OK);
return FALSE;
}
*/
BOOL bResult;
CSenLink *pSenLink;
//原建索引的第一步
strcpy(szDicIndexName , "DictRes\\");
strcat(szDicIndexName , szIndexName);
bResult = BuildIndexStepOne(szInDefineName,szOutDefineName,
szDicIndexName,szEngInfoName,
szCodeTableName,szUserDicName);
if( !bResult ){
return FALSE;
}
pSenLink = new CSenLink;
//FALSE -- 对szEngInfoName进行逆序排列,输出结果为:szEngSortInfoName
bResult = pSenLink->SortMain(szEngInfoName,szEngSortInfoName,FALSE);
delete pSenLink; //释放空间
if( !bResult ){
return FALSE;
}
//原建索引的第二步
char szHuffmanFreqDataName[] = "DictRes\\HufFreqU.dat";
char szEngHuffName[] = "DictRes\\EngHuffU.txt";
char szEngSortHuffName[] = "DictRes\\SEngHuff.txt";
bResult = BuildIndexStepTwo(szEngSortInfoName,
szHuffmanFreqDataName,szEngHuffName);
if( !bResult ){
return FALSE;
}
//yys 98.5.14 Bgn
remove(szEngSortInfoName);
//yys 98.5.14 End
//原建索引的第三步
pSenLink = new CSenLink;
//TRUE -- 对szEngHuffName进行顺序排列,输出结果为:szEngSortHuffName
bResult = pSenLink->SortMain(szEngHuffName,szEngSortHuffName,TRUE);
delete pSenLink; //释放空间
//yys 98.5.14 Bgn
bResult = BuildIndexStepThree(szEngSortHuffName,szDicIndexName,szUserDicName);
remove(szEngSortHuffName);
//yys 98.5.14 End
if( !bResult ){
return FALSE;
}
else
AfxMessageBox("建索引完成!\n");
}
fclose(fpLog);
return TRUE;
}
DictNode::DictNode()
{
m_pszEnglish = NULL;
m_nEnglishLen = 0;
m_bIsExistOrig = FALSE;
m_pszOrig = NULL;
m_nOrigLen = 0;
m_nWordStyle = 0;
m_bIsExistAmbig = 0;
m_pszAmbig = NULL;
m_nAmbigLen = 0;
m_nChinNum = 0;
m_pFirstChin = NULL;
//yys 98.4.26 Bgn
m_byMaxPhraseLen=0; //如果 m_pszEnglish为一个单词,
// 记录短语第一个为该词的短语最大长度
m_byForm=0;
/*
0:original,
1:-ed1,2:ed2,3:-ing,4:-s,5:ed1,ed2; 6; er;7;est;
8: 待定 is or has;9: 待定 would or had 10: unknown word,
11:number
*/
//int m_byDe; //1:所有格
m_nLxhCate=NULL;
m_nLxhCateLen=0;
m_byGenetive=0; //1:所有格
m_byNumber=0; //0:单数, 1:复数
m_pNextWordNode=NULL; //下一个 词结点
m_pPrevWordNode=NULL; //前一个 词结点
m_byDictForm=0;
m_byDictGenetive=0; //1:所有格
m_cDictTense=0; //时态:0现在,1过去,2将来,3过去将来
m_cDictAspect=0; //体态:0一般,1进行,2完成,3完成进行
m_cDictVoice=0; //0:主动,1:被动
m_cDictNegative=0; //0:肯定,1:否定
m_byDictPerson=0; //1:第一人称,2:第二人称,3:第三人称
m_byDictNumber=0; //0:单数,1:复数
//LXHEnd
//CDQBegin
m_pszPostag=NULL; //统计算法词性标注结果
m_pszBaseNPtag=NULL; //BaseNP标注结果
//CDQEnd
//xunedBegin
m_pCurrSele=NULL; //当前完成译文选择的词典一行
m_pszTranslation=NULL; //生成或选择后的译文
m_nTranslationLen=0;
m_pszParseRule=NULL; //分析数的中间结点的分析规则
m_nParseRule=0;
m_pszTransferRule=NULL; //分析数的中间结点的转换生成规则
m_nTransferRule=0;
m_pszDynamicInfo=NULL; //翻译过程中动态生成的信息
m_nDynamicInfo=0;
m_bIsSentBegin=FALSE; //当前结点是否为句子头
m_bIsSentEnd=FALSE; //当前结点是否为句子尾
m_bIsLeafNode=FALSE; //当前结点是否为分析树的叶结点
m_pChild=NULL; //当前结点的儿子结点
m_pParent=NULL; //当前结点的父亲结点
m_pLeftBrother=NULL; //当前结点的左兄弟结点
m_pRightBrother=NULL; //当前结点的右兄弟结点
m_pLeftCousin=NULL; //当前结点的左堂兄弟结点
m_pRightCousin=NULL; //当前结点的右堂兄弟结点
//xunedEnd
//yys 98.4.26 End
}
DictNode::~DictNode()
{
//FreeNode(m_pDictNode);
}
DictChin *DictNode::AppendEmptyChin()
// 在m_pFirstChin的尾部追加一个空的DictChin结点,
// 返回新添加的结点的指针
{
DictChin *pNewChin;
DictChin *pCurrChin = m_pFirstChin;
//yys 98.4.30 Bgn
if( !pCurrChin ){ //如果pCurrChin为空
m_pFirstChin = new DictChin;
pNewChin = m_pFirstChin;
pNewChin->m_pNextChin = NULL;
m_nChinNum = 1;
}
//yys 98.4.30 End
else{
do {
if ( pCurrChin->m_pNextChin == NULL )
break;
pCurrChin = pCurrChin->m_pNextChin;
} while ( TRUE );
pNewChin = new DictChin;
pCurrChin->m_pNextChin = pNewChin;
pNewChin->m_pNextChin = NULL;
m_nChinNum ++;
}
return pNewChin;
}
DictChin *DictNode::GetPointedChin(int nPointedIndex)
// 取链表中指定位置的中文结点
// 从 0 算起
{
DictChin *pCurrChin = m_pFirstChin;
for ( int Loop=0;Loop<nPointedIndex;Loop++ )
pCurrChin = pCurrChin->m_pNextChin;
return pCurrChin;
}
void DictNode::DeletePointedChin(int nPointedIndex)
// 删除链表中指定位置的中文结点
// 从 0 算起
{
DictChin *pLastChin = NULL;
DictChin *pCurrChin;
DictChin *pNextChin;
if ( m_pFirstChin == NULL )
return;
if ( nPointedIndex > 0 )
pLastChin = GetPointedChin(nPointedIndex-1);
pCurrChin = GetPointedChin(nPointedIndex);
pNextChin = pCurrChin->m_pNextChin;
delete pCurrChin;
if ( pLastChin != NULL )
pLastChin->m_pNextChin = pNextChin;
if ( nPointedIndex == 0 )
m_pFirstChin = pNextChin;
m_nChinNum --;
}
DictSearch::DictSearch()
{
m_pDictNode = NULL;
/* qlp delete 5.27
m_obDiction = NULL;
m_obSysDiction = NULL;
m_obAddtionDiction = NULL;
m_obUserDiction = NULL;
*/
bSearchSysDict = TRUE;
bSearchUserDict = TRUE;
bHaveAllocMemory = FALSE;
//qlp delete 5.27.
/*
m_fpIndexDat = NULL;
m_fpSysIndexDat = NULL; //系统词典索引
m_fpAddtionIndexDat = NULL; //系统附加词典索引
m_fpUserIndexDat = NULL; //用户词典索引
*/
GetUserIndexName( szUserDictName );
for ( int Loop=0;Loop<=BYTES_BIT_NUM;Loop++ )
m_narrayDector[Loop] = (int)pow(2,Loop);
m_nSameHushWordsNum = 0;
}
DictSearch::~DictSearch()
{
/* m_obDiction = NULL;
m_obSysDiction = NULL;
m_obAddtionDiction = NULL;
m_obUserDiction = NULL;
*/
}
Dictionary *DictSearch::DictInit( LPSTR pszDicIndexName ,
FILE *m_fpIndexDat )
{
char szDictIndexName[MAX_FILENAME_LEN]; // 索引文件名
strcpy(szDictIndexName,pszDicIndexName);
strcat(szDictIndexName,".idx");
char szDictDtaName[MAX_FILENAME_LEN]; // 二级索引文件名
char szDictDatName[MAX_FILENAME_LEN]; // 数据文件名
strcpy(szDictDtaName,pszDicIndexName);
strcat(szDictDtaName,".dta");
strcpy(szDictDatName,pszDicIndexName);
strcat(szDictDatName,".dat");
m_fpIndexDat = fopen(szDictIndexName , "r + b");
if ( !m_fpIndexDat ) {
CString strMsg;
strMsg.Format("Cann't open file %s !",szDictIndexName );
AfxMessageBox(strMsg);
return NULL;
}
Dictionary *m_obDiction = new Dictionary(szDictDtaName,szDictIndexName,0);
return m_obDiction ;
}
BOOL DictSearch::InitUserDict( char *pszUserDicIndexName )
{
char szUserDictIndexName[MAX_FILENAME_LEN]; // 索引文件名
char szUserDictDtaName[MAX_FILENAME_LEN]; // 二级索引文件名
char szUserDictDatName[MAX_FILENAME_LEN]; // 数据文件名
strcpy(szUserDictIndexName,pszUserDicIndexName);
strcat(szUserDictIndexName,".idx");
strcpy(szUserDictDtaName,pszUserDicIndexName);
strcat(szUserDictDtaName,".dta");
strcpy(szUserDictDatName,pszUserDicIndexName);
strcat(szUserDictDatName,".dat");
if ( (( (CPosTagApp* ) AfxGetApp() ) ->m_fpUserIndexDat = fopen(szUserDictDatName,"r+b") ) == NULL ) {
CString strMsg;
strMsg.Format("Cann't open file %s ! ", szUserDictDatName);
AfxMessageBox(strMsg);
return FALSE;
}
( (CPosTagApp* ) AfxGetApp() ) ->m_obUserDiction = new Dictionary(szUserDictDtaName,szUserDictIndexName,0);
if ( !LoadUserHushTableFromIndexDat(( (CPosTagApp* ) AfxGetApp() ) ->m_fpUserIndexDat) )
return FALSE;
return TRUE;
}
BOOL DictSearch::UserDictExit()
{
if(( (CPosTagApp* ) AfxGetApp() ) -> m_fpUserIndexDat )
{
fclose(( (CPosTagApp* ) AfxGetApp() ) ->m_fpUserIndexDat);
( (CPosTagApp* ) AfxGetApp() ) ->m_fpUserIndexDat = NULL;
}
if(( (CPosTagApp* ) AfxGetApp() ) -> m_obUserDiction ){
delete ( (CPosTagApp* ) AfxGetApp() ) ->m_obUserDiction;
( (CPosTagApp* ) AfxGetApp() ) -> m_obUserDiction = NULL;
}
return TRUE;
}
BOOL DictSearch::SearchWordInit(LPSTR pszSysDicIndexName,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -