⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 childfrm.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
📖 第 1 页 / 共 2 页
字号:
		{
			for ( j=0;j<viterbiAnn[i].GetSize()-1;j++ )
			{
				if ( viterbiPro[i][0]<viterbiPro[i][j+1] )
				{
					viterbiPro[i][0]=viterbiPro[i][j+1];
					viterbiAnn[i][0]=viterbiAnn[i][j+1];
					viterbiPos[i][0]=viterbiPos[i][j+1];
				}
			}
			posAnnotate.Add(viterbiAnn[i][0]);
//			fprintf(fpOutput,viterbiAnn[i][0]);
		}
		else
		{
			for ( j=0;j<viterbiAnn[i].GetSize();j++ )
			{
				if ( viterbiAnn[i][j]==viterbiPos[i+1][0] )
				{
					posAnnotate.Add(viterbiAnn[i][j]);
//					fprintf(fpOutput,viterbiAnn[i][j]);
					viterbiPos[i][0]=viterbiPos[i][j];
					break;
				}
			}
		}
	}
//    fclose(fpOutput);
	fclose(fpInput2);

	for (  l=0;l<200;l++ )
	{
		viterbiPos[l].RemoveAll();
		viterbiAnn[l].RemoveAll();
	}

	CurNode = g_objMorSent.m_pWordFirst;

	for ( l=posAnnotate.GetSize()-1;l>=0;l-- )
	{
		strcpy(crTmp,posAnnotate[l]);
		CurNode->m_pszPostag = new char[5];
		strcpy(CurNode->m_pszPostag, crTmp);
		strTmp2=CurNode->m_pszEnglish;
		strTmp2=strTmp2+"/"+crTmp+" ";
		fputs(strTmp2,fpOutput1);
		CurNode = CurNode->m_pNextWordNode;	
	}
/*
	CString signTmp;
	CStringArray CTemp;
	DictChin *pDicChin1;
	int nCate,n;
	CMTNodeList *MTSenNodeList;
	MTSenNodeList= new CMTNodeList;

	CMTNode *MTCurNode;
	MTCurNode = new CMTNode;
	MTSenNodeList->m_pSentBegin = MTCurNode;
	MTCurNode->m_pListPrev=NULL;
	for ( l=posAnnotate.GetSize()-1;l>=0;l-- )
	{
		MTCurNode->m_psWord=CurNode->m_pszEnglish;
		MTCurNode->m_psAmbig=CurNode->m_pszAmbig;
//最佳系统词性序列输出
		strcpy(crTmp,posAnnotate[l]);
		while ( CurNode!=NULL )
		{
			pDicChin1 = CurNode->m_pFirstChin;
			signTmp="";
			while ( pDicChin1 != NULL )
			{
				nCate=pDicChin1->m_nCate; //得到词性值后,获得词性
			   	strTmp1=NumChangeCate(nCate);//获得词性后再赋给strTmp2
				strcpy(Test,strTmp1);
				strTmp2=CateChangePenn(Test);
				if  ( strstr(strTmp2,crTmp) != NULL )
				{
					CTemp.Add(strTmp1);
					n=CTemp.GetSize();
					MTCurNode->m_psPOS = CTemp[n-1].GetBuffer(CTemp[n-1].GetLength());
					CTemp[n-1].ReleaseBuffer();
					MTCurNode->m_pListNext = new CMTNode;
					MTCurNode->m_pListNext->m_pListPrev = MTCurNode;
					MTCurNode = MTCurNode->m_pListNext;

					strTmp2=CurNode->m_pszEnglish;
					strTmp2=strTmp2+"/"+strTmp1;
					signTmp=strTmp2;
					fputs(strTmp2,fpOutput1);
					fputs(" ",fpOutput1);
					break;
				}
				else
					pDicChin1 = pDicChin1->m_pNextChin;
			}
			if ( strcmp(signTmp,"")==0 )
			{
				strTmp1=PennContrast(crTmp);

				CTemp.Add(strTmp1);
				n=CTemp.GetSize();
				MTCurNode->m_psPOS = CTemp[n-1].GetBuffer(CTemp[n-1].GetLength());
				CTemp[n-1].ReleaseBuffer();
				MTCurNode->m_pListNext = new CMTNode;
				MTCurNode->m_pListNext->m_pListPrev = MTCurNode;
				MTCurNode = MTCurNode->m_pListNext;

				strTmp2=CurNode->m_pszEnglish;
				strTmp2=strTmp2+"/"+strTmp1;
				fputs(strTmp2,fpOutput1);
				fputs(" ",fpOutput1);
			}
			break;
		}		
		CurNode = CurNode->m_pNextWordNode;	

	}

	MTCurNode->m_pListPrev->m_pListNext = NULL; 
	MTSenNodeList->m_pSentEnd = MTCurNode->m_pListPrev;
*/
	fputs("\n",fpOutput1);

	posAnnotate.RemoveAll();	

	for ( j=0;j<200;j++ )
	{
		for ( l=0;l<44;l++ )
		{
			viterbiPro[j][l]=0;
		}
	}

	fclose(fpInput3);
	fclose(fpInput4);
}

CString CChildFrame::PennContrast(char crTmp[200])
{
	CString strTmp,linTmp,posTmp;
	const int c_nMaxLineLen=150;
	char szOneLine[c_nMaxLineLen];

	char szPennTableName[]="cdqprg\\conmarker.txt";
	FILE* fpInput;
	fpInput=fopen(szPennTableName,"rb");
	if ( fpInput == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",szPennTableName);
		AfxMessageBox(stErrorMsg);
	}
	strTmp=" ";
	strTmp=strTmp+crTmp;
	strTmp=strTmp+"  ";
		
	for ( int i=0; i<44; i++ )
	{//映射为Penn Treebank 标注符号
		fgets(szOneLine,c_nMaxLineLen,fpInput);
		linTmp=szOneLine;
		if ( strstr(linTmp,strTmp) != NULL )
		{
			linTmp=linTmp.Right(linTmp.GetLength()-linTmp.Find("  ")-2);
			posTmp=linTmp.Left(linTmp.Find("  "));
		}
	}
	
	fclose(fpInput);
	return posTmp;
}
/*
CString CateChangePenn(char Test[10])
{
	CString posTmp;
	CString linTmp,strTmp;
	const int c_nMaxLineLen=150;
	char szOneLine[c_nMaxLineLen];

	char szPennTableName[]="cdqprg\\conmarker.txt";
	FILE* fpInput;
	fpInput=fopen(szPennTableName,"rb");
	if ( fpInput == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",szPennTableName);
		AfxMessageBox(stErrorMsg);
	}

	strTmp=" ";
	strTmp=strTmp+Test;
	strTmp=strTmp+"  ";
		
	for ( int i=0; i<44; i++ )
	{//映射为Penn Treebank 标注符号
		fgets(szOneLine,c_nMaxLineLen,fpInput);
		linTmp=szOneLine;
		if ( strstr(linTmp,strTmp) != NULL )
		{
			linTmp=linTmp.Left(linTmp.Find("  "));
			posTmp=posTmp+linTmp.Right(linTmp.GetLength()-1);
			continue;
		}
	}
	
	fclose(fpInput);
	return posTmp;
}

*/
void  CChildFrame::Find(CString OneWord, CStringArray& arDictContent)
{
	char dicindexFileName[]="cdqprg\\dicindex.txt";
	FILE* fpFile;
	fpFile=fopen(dicindexFileName,"rb");
	if ( fpFile == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",dicindexFileName);
		AfxMessageBox(stErrorMsg);
		return;
	}

	char CodeTabFileName[]="dictres\\CodeTab.txt";
	FILE* fpInput;
	fpInput=fopen(CodeTabFileName,"rb");
	if ( fpInput == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",CodeTabFileName);
		AfxMessageBox(stErrorMsg);
		return;
	}
	const int c_nMaxLineLen=1500;
	char szOneLine[c_nMaxLineLen];


	struct	DictNode *NewDicNode;
	CString strTmp;
	int n_Cate;
	char *Word,vocable[150];
    IX_DESC g_DictInd;
	ENTRY    entry;
    open_index("cdqprg\\DicyInd.idx", &g_DictInd, 0);

	strcpy(vocable,OneWord);
	Word=vocable;
    strcpy(entry.key, OneWord);
	
    if( find_key(&entry, &g_DictInd) )
	{
		ReadDictData(entry.recptr,fpFile,arDictContent);
	}
	else
	{
/*		char sMsg[200];
		sprintf(sMsg, "Not Find %s!\nNeed Restore! ",entry.key);
		AfxMessageBox(sMsg);	
*/
		//从语料生成的字典中找不到的词,
		//要从系统的字典中找出其各种词性	

		NewDicNode = WordRestore(Word,SEARCH_ALLCH);
		
		if( NewDicNode->m_nLxhCate != NULL && 
			strstr(NewDicNode->m_nLxhCate,"!")==NULL )
		{
			strcpy(vocable,NewDicNode->m_nLxhCate);
			if (strstr(vocable,"v")!=0 && !isupper(OneWord[0]))
			{
				if (strstr(OneWord,"ing")!=0)
					strcpy(vocable,"VBG");
				else if (strstr(OneWord,"ed")!=0)
				{
					strcpy(vocable,"VBD");
					strcat(vocable,"  1\n");
					strTmp=vocable;
					arDictContent.Add(strTmp);
					strcpy(vocable,"VBN");
				}
				else if (strstr(OneWord,"s")!=0)
					strcpy(vocable,"VBZ");
				else
				{
					strcpy(vocable,"VB");
					strcat(vocable,"  1\n");
					strTmp=vocable;
					arDictContent.Add(strTmp);
					strcpy(vocable,"VBP");
				}
			}
			else if (strcmp(vocable,"ns")==0 )
				strcpy(vocable,"PRP$");
			else if (isupper(OneWord[0]))
			{
				if( strstr(OneWord,"s")!=0)
					strcpy(vocable,"NNPS");
				else
					strcpy(vocable,"NNP");
			}
			else
			{
				if( strstr(OneWord,"s")!=0)
					strcpy(vocable,"NNS");
				else
					strcpy(vocable,"NN");
			}
			strcat(vocable,"  1\n");
			strTmp=vocable;
			arDictContent.Add(strTmp);
		}
		else if ( NewDicNode->m_pFirstChin == NULL )
		{
			NewWordContent(arDictContent);	
		}
		else
		{
			while(NewDicNode->m_pFirstChin != NULL )
			{
				n_Cate=NewDicNode->m_pFirstChin->m_nCate; //得到词性值后,获得词性
				for(int i=0;i<n_Cate;i++)
				{
					fgets(szOneLine,c_nMaxLineLen,fpInput);	
				}
				fseek(fpInput,0L,SEEK_SET);
				strTmp=szOneLine;
				strTmp=strTmp.Right(strTmp.GetLength()-strTmp.Find('=')-1);	
				strTmp=strTmp.Left(strTmp.Find(' '));
				strTmp=strTmp+"  1\n";
				if(arDictContent.GetSize()==0)
					arDictContent.Add(strTmp);
				else
				{
					for(i=0;i<arDictContent.GetSize();i++)
					{
						if (strcmp(arDictContent[i],strTmp)==0)
							break;
						else if(i==arDictContent.GetSize()-1)
							arDictContent.Add(strTmp);
					}
				}				
		   		NewDicNode->m_pFirstChin = NewDicNode->m_pFirstChin->m_pNextChin;
			}
		}

	}

	fclose(fpFile);
	fclose(fpInput);
   	close_index(&g_DictInd);

}

void CChildFrame::NewWordContent(CStringArray& arDictContent)
{

	//出现生词时的处理
	
	//把可能的词性都赋给这个单词且发射概率设为1
	CString strTmp;
	

	const int c_nMaxLineLen=1500;
	char szOneLine[c_nMaxLineLen];

	char CodeTabFileName[]="dictres\\CodeTab.txt";
	FILE* fpInput;
	fpInput=fopen(CodeTabFileName,"rb");
	if ( fpInput == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",CodeTabFileName);
		AfxMessageBox(stErrorMsg);
		return;
	}

	for ( int i=0;i<25;i++ )
	{
		fgets(szOneLine,c_nMaxLineLen,fpInput);	
	}


	for ( i=0;i<92;i++ )
	{
		fgets(szOneLine,c_nMaxLineLen,fpInput);	
		strTmp=szOneLine;
		strTmp=strTmp.Right(strTmp.GetLength()-strTmp.Find('=')-1);	
		strTmp=strTmp.Left(strTmp.Find(' '));
		strTmp=strTmp+"  1\n";
		arDictContent.Add(strTmp);
	}
	fclose(fpInput);
}

void CChildFrame::ChangeWordContent(CString strTmp,CStringArray& arDictContent)
{	//把系统字典单词中的各个词性都赋发射概率1

	CString strAmbig,linTmp;

	while ( strstr(strTmp,"/") != NULL )
	{
		strAmbig = strTmp.Left(strTmp.Find('/'));
/*		if ( strcmp(strAmbig,"v")==0 )
		{
			arDictContent.Add("VB  1\n");
			arDictContent.Add("VBD  1\n");
			arDictContent.Add("VBG  1\n");
			arDictContent.Add("VBN  1\n");
			arDictContent.Add("VBP  1\n");
			arDictContent.Add("VBZ  1\n");
		}
*/		strAmbig = strAmbig+"  1\n";
		arDictContent.Add(strAmbig);

		strTmp = strTmp.Right(strTmp.GetLength()-strTmp.Find('/')-1);
	}
	strTmp = strTmp+"  1\n";
	arDictContent.Add(strTmp);
}

CString CChildFrame::NumChangeCate(int nCate)
{
		char szCodeTableName[]="DictRes\\CodeTab.txt";
		CString linTmp,strTmp1;
		const int c_nMaxLineLen=1500;
		char szOneLine[c_nMaxLineLen];
		
		FILE* fpInput1;
		fpInput1=fopen(szCodeTableName,"rb");
		if ( fpInput1 == NULL )
		{
			char stErrorMsg[200];
			sprintf(stErrorMsg,"Error Open %s !",szCodeTableName);
			AfxMessageBox(stErrorMsg);
		}
		for ( int i=0;i<nCate;i++)
		{//找出对应词性
			fgets(szOneLine,c_nMaxLineLen,fpInput1);		
		}
		linTmp=szOneLine;
		strTmp1=linTmp.Right(linTmp.GetLength()-linTmp.Find('=')-1);	
		strTmp1=strTmp1.Left(strTmp1.Find(' '));
		fclose(fpInput1);
		return strTmp1;
}

BOOL CChildFrame::ReadDictData(long e,FILE* fpFile,CStringArray& arDictContent)
{
	CString linTmp1;
	const int c_nMaxLineLen=1500;
	char szOneLine[c_nMaxLineLen];
	int n;
	fseek(fpFile,e,SEEK_SET);
	fgets(szOneLine,c_nMaxLineLen,fpFile);
	linTmp1=szOneLine;
	if ( !strcmp(linTmp1.Left(1),"0") == 0 )
	{
		fgets(szOneLine,c_nMaxLineLen,fpFile);	//得到的是原型
		fgets(szOneLine,c_nMaxLineLen,fpFile);
		linTmp1=szOneLine;
		n=atoi(linTmp1);
		for ( int i=0;i<n;i++ )
		{
			fgets(szOneLine,c_nMaxLineLen,fpFile);
			linTmp1=szOneLine;
			arDictContent.Add(linTmp1);
		}
	}
	else
	{
		fgets(szOneLine,c_nMaxLineLen,fpFile);
		linTmp1=szOneLine;
		n=atoi(linTmp1);
		for ( int i=0;i<n;i++ )
		{
			fgets(szOneLine,c_nMaxLineLen,fpFile);
			linTmp1=szOneLine;
			arDictContent.Add(linTmp1);
		}
	}

	return TRUE;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -