⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 childfrm.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// ChildFrm.cpp : implementation of the CChildFrame class
//

#include "stdafx.h"
#include "PosTag.h"

#include "ChildFrm.h"
#include "MainFrm.h"
#include "PosTagDoc.h"
#include "PosTagView.h"
#include "test.h"
#include "Mtnodelist.h"
#include "MTNodeStu.h"
#include "math.h"
#include "Bplus.h"
#include "setnumdialog.h"

//CString CateChangePenn(char Test[10]);



extern SentenceType	g_objMorSent;
extern DictNode* WordRestore(char *inputword, int nSearchMod);
int sentenceNum=1;
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/////////////////////////////////////////////////////////////////////////////
// CChildFrame

IMPLEMENT_DYNCREATE(CChildFrame, CMDIChildWnd)

BEGIN_MESSAGE_MAP(CChildFrame, CMDIChildWnd)
	//{{AFX_MSG_MAP(CChildFrame)
	ON_COMMAND(ID_POSTAG, OnPostag)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CChildFrame construction/destruction

CChildFrame::CChildFrame()
{
	// TODO: add member initialization code here
}

CChildFrame::~CChildFrame()
{
}

BOOL CChildFrame::PreCreateWindow(CREATESTRUCT& cs)
{
	// TODO: Modify the Window class or styles here by modifying
	//  the CREATESTRUCT cs

	return CMDIChildWnd::PreCreateWindow(cs);
}

/////////////////////////////////////////////////////////////////////////////
// CChildFrame diagnostics

#ifdef _DEBUG
void CChildFrame::AssertValid() const
{
	CMDIChildWnd::AssertValid();
}

void CChildFrame::Dump(CDumpContext& dc) const
{
	CMDIChildWnd::Dump(dc);
}

#endif //_DEBUG

/////////////////////////////////////////////////////////////////////////////
// CChildFrame message handlers

bool CChildFrame::SearchWordInit1(LPSTR pszSysDicIndexName,
								LPSTR pszHuffmanFreqDataName)
{
	char szSysDictIndexName[MAX_FILENAME_LEN]; // 索引文件名
	char szSysDictDtaName[MAX_FILENAME_LEN]; // 二级索引文件名
	char szSysDictDatName[MAX_FILENAME_LEN]; // 数据文件名
	//yys 98.5.22 Bgn
	char szAddtionDictIndexName[MAX_FILENAME_LEN]; // 索引文件名
	char szAddtionDictDtaName[MAX_FILENAME_LEN]; // 二级索引文件名
	char szAddtionDictDatName[MAX_FILENAME_LEN]; // 数据文件名
	
	char pszAddtionDicIndexName[] = "DictRes\\AddDict";	//系统附加词典名
	DictSearch qsearch;
	int found=0;
	strcpy(szSysDictIndexName,pszSysDicIndexName);
	strcat(szSysDictIndexName,".idx");
	strcpy(szAddtionDictIndexName,pszAddtionDicIndexName);
	strcat(szAddtionDictIndexName,".idx");	
	
	strcpy(szSysDictDtaName,pszSysDicIndexName);
	strcat(szSysDictDtaName,".dta");
	strcpy(szAddtionDictDtaName,pszAddtionDicIndexName);
	strcat(szAddtionDictDtaName,".dta");	
	
	strcpy(szSysDictDatName,pszSysDicIndexName);
	strcat(szSysDictDatName,".dat");
	strcpy(szAddtionDictDatName,pszAddtionDicIndexName);
	strcat(szAddtionDictDatName,".dat");	
	if ( (( (CPosTagApp* ) AfxGetApp() ) ->m_fpSysIndexDat = fopen(szSysDictDatName,"r+b") ) == NULL ||
		 (( (CPosTagApp* ) AfxGetApp() ) ->m_fpAddtionIndexDat = fopen(szAddtionDictDatName,"r+b") ) == NULL ) {
		CString strMsg;
		strMsg.Format("Cann't open file %s or %s ! ",
			szSysDictDatName,szAddtionDictDatName);
		AfxMessageBox(strMsg);
		return FALSE;
	}
	( (CPosTagApp* ) AfxGetApp() ) ->m_obSysDiction = new Dictionary(szSysDictDtaName,szSysDictIndexName,0);
    ( (CPosTagApp* ) AfxGetApp() ) ->m_obAddtionDiction = new Dictionary(szAddtionDictDtaName,szAddtionDictIndexName,0);
  	found=qsearch.LoadSysHushTableFromIndexDat(( (CPosTagApp* ) AfxGetApp() ) ->m_fpSysIndexDat,( (CPosTagApp* ) AfxGetApp() ) ->m_fpAddtionIndexDat);
	if(found==0)
		return FALSE;
	return TRUE;
}

void CChildFrame::OnPostag() 
{
	// TODO: Add your command handler code here
	if ( SearchWordInit1("DictRes\\EcDict","DictRes\\HuffFreq.dat") == TRUE )
	{
		bIsInit=TRUE;
	}

	
	if ( !bIsInit )	
	{
		AfxMessageBox("Please Initializing!");
		return;
	}

	char szSour[3000];
	CPosTagView* pView;
	int nCurrLineIndex;
	CString strLine;
	int nLen,Len;


//	char Re_CompareFileName[]="cdqprg\\re_compare.txt";	//44个词性标注符号
//	FILE* fpOutput1;
	fpOutput1=fopen(Re_CompareFileName,"wb");
	if ( fpOutput1 == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",Re_CompareFileName);
		AfxMessageBox(stErrorMsg);
		return;
	}
	

	pView=(CPosTagView*)GetActiveView( ); 
	nCurrLineIndex = pView->GetRichEditCtrl().LineFromChar(-1);// 取当前行号
	for (m_nCurrLineIndex=0;m_nCurrLineIndex<sentenceNum;m_nCurrLineIndex++)
	{

	nLen = pView->GetRichEditCtrl().GetLine(nCurrLineIndex,strLine.GetBuffer(300));
	//Test
	if (nLen<=2)
	break;

	char cTemp;
	cTemp=strLine.GetBuffer(300)[nLen-2];
	cTemp=strLine.GetBuffer(300)[nLen-1];
	//Test
	Len = nLen;

	if ( strLine.GetBuffer(300)[Len-2] == 13 )
	{
		strLine.GetBuffer(300)[Len-2] = '\0';
		strLine.ReleaseBuffer();
	}
	else
	{
		while (strLine.GetBuffer(300)[Len-2] != 13 )
		{
			strLine.GetBuffer(300)[Len]='\0';
			CString strLineTemp;
			strLine.ReleaseBuffer();
			nCurrLineIndex = nCurrLineIndex+1;
			nLen = pView->GetRichEditCtrl().GetLine(nCurrLineIndex,strLineTemp.GetBuffer(300));	
			if ( strLineTemp.GetBuffer(300)[nLen-2] == 13 )
			{
				strLineTemp.GetBuffer(300)[nLen-2] = '\0';
				strLineTemp.ReleaseBuffer();
				Len = Len+nLen;
				strLine+=strLineTemp;
				break;
			}
			else
			{
				strLineTemp.GetBuffer(300)[nLen] = '\0';
				strLineTemp.ReleaseBuffer();
				Len = Len+nLen;
				strLine+=strLineTemp;
			}
		}
	}
	strcpy(szSour,strLine);	
	nCurrLineIndex = nCurrLineIndex+1;	
	MorphorAnalyze(szSour);
	BestSequence();
	}
	fclose(fpOutput1);
	delete ( (CPosTagApp* ) AfxGetApp() ) ->m_obSysDiction;
	delete ( (CPosTagApp* ) AfxGetApp() ) ->m_obAddtionDiction;
	AfxMessageBox("OK!"); 	
}


void  CChildFrame::BestSequence()
{
	CString linTmp1,linTmp2,linTmp3;
	CString strTmp1,strTmp2,strTmp3;
	CString posTmp1,posTmp2;
	char crTmp[200];
//	char Test[10];
	double n_prob;
	double posPro[10];
	double b[44];
	double beginPro[44];
	double viterbiPro[200][44];
	double posPairPro[44][44];
	CStringArray viterbiPos[200];	//记录第I个单词中最大viterbiPro值所对应的前一个单词的词性

	CStringArray viterbiAnn[200];	//记录第I(0~499)个单词的J(0~44)个词性
	CStringArray wordContent;
	CStringArray posAnnotate;
	CStringArray possibleAnn;
	int i,j,l,h,s,t;
	int ncount;
	int count[200][8],nowcount[8];
	const int c_nMaxLineLen=1500;
	char szOneLine[c_nMaxLineLen];

	char szPennTableName[]="cdqprg\\conmarker.txt";
	char divertFileName[]="cdqprg\\divmatrix.txt";	//转移概率矩阵
	char beginFileName[]="cdqprg\\beginstate.txt";	//初始状态分布
	
	CStringArray npos;

	FILE* fpInput2;
	fpInput2=fopen(szPennTableName,"rb");
	if ( fpInput2 == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",szPennTableName);
		AfxMessageBox(stErrorMsg);
		return;
	}

	FILE* fpInput3;
	fpInput3=fopen(divertFileName,"rb");
	if ( fpInput3 == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",divertFileName);
		AfxMessageBox(stErrorMsg);
		return;
	}
	for ( i=0;i<44;i++)
	{//转移概率赋值
		fgets(szOneLine,c_nMaxLineLen,fpInput3);		
		linTmp3=szOneLine;
		for ( int j=0;j<44;j++ )
		{
			strTmp3=linTmp3.Left(linTmp3.Find("  "));
			posPairPro[i][j]=atof(strTmp3);
			linTmp3=linTmp3.Right(linTmp3.GetLength()-linTmp3.Find("  ")-2);
		}
	}

	FILE* fpInput4;
	fpInput4=fopen(beginFileName,"rb");
	if ( fpInput4 == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",beginFileName);
		AfxMessageBox(stErrorMsg);
		return;
	}
	for ( i=0;i<44;i++ )
	{//初始状态赋值
		fgets(szOneLine,c_nMaxLineLen,fpInput4);		
		linTmp3=szOneLine;
		strTmp3=linTmp3.Right(linTmp3.GetLength()-linTmp3.Find(' ')-1);
		beginPro[i]=atof(strTmp3);
	}
/*
	char viterbiFileName[]="viterbi.txt";
	FILE* fpOutput;
	fpOutput=fopen(viterbiFileName,"wb");
	if ( fpOutput == NULL )
	{
		char stErrorMsg[200];
		sprintf(stErrorMsg,"Error Open %s !",viterbiFileName);
		AfxMessageBox(stErrorMsg);
		return;
	}
*/	struct DictNode *CurNode;
	int m=0;	
	CurNode = g_objMorSent.m_pWordFirst;
	while ( CurNode!=NULL )
	{
		i=0;
		strTmp1 = CurNode->m_pszEnglish;
		
		Find(strTmp1,wordContent);	//单词内容(词性、发射概率)都存于arDictContent数组中
									//取CurNode.m_pFirstChin.m_nCate
									//及CurNode.m_pNextChin.m_nCate....的值赋予arDictContent数组中,
									//若有多个词性,需分别赋值且每个词性的发射概率都为1。
		if (strcmp(strTmp1,"There")!=0 &&
			strcmp(strTmp1,"there")!=0)
		{
/*			if ( strcmp(strTmp1,CurNode->m_pszOrig)==NULL )
			{//只在单词与原型一致时使用系统词性(')
			if ( strcmp(strTmp1,CurNode->m_pszOrig)==NULL )
			{//只在单词与原型不一致时使用系统词性(")
*/				if ( CurNode->m_pszAmbig != 0 )
				{
					strTmp3 = CurNode->m_pszAmbig;
					i=1;
					posTmp2="";
					posTmp1 = strTmp3.Left(strTmp3.Find('/'));
					if ( strchr(posTmp1,'v')!=0 && strchr(posTmp1,'a')==0 )
						posTmp2="v/";
					else
						posTmp2=posTmp1+"/";
					strTmp3 = strTmp3.Right(strTmp3.GetLength()-strTmp3.Find('/')-1);
					while ( strstr(strTmp3,"/") != NULL )
					{
						posTmp1 = strTmp3.Left(strTmp3.Find('/'));
						if ( strchr(posTmp1,'v')!=0 && strchr(posTmp1,'a')==0 )
						{
							if (strstr(posTmp2,"v/")==0)
							{
								posTmp2=posTmp2+"v/";
							}
							else
							{
								strTmp3 = strTmp3.Right(strTmp3.GetLength()-strTmp3.Find('/')-1);
								break;
							}
						}
						else
						{
							posTmp2=posTmp2+posTmp1+"/";
						}
						strTmp3 = strTmp3.Right(strTmp3.GetLength()-strTmp3.Find('/')-1);
						i=i+1;
					}
					posTmp1=strTmp3;
					if ( strchr(posTmp1,'v')!=0 && strchr(posTmp1,'a')==0 )
					{
						if (strstr(posTmp2,"v/")==0)
						{
							posTmp2=posTmp2+"v";
							i=i+1;
						}
						else
							posTmp2=posTmp2.Left(posTmp2.GetLength()-1);
					}
					else
					{
						posTmp2=posTmp2+posTmp1;
						i=i+1;
					}
				}
				if (i>wordContent.GetSize())
				{
					wordContent.RemoveAll();
					ChangeWordContent(posTmp2,wordContent);
				}
//			}
		}
		t=0;
		for ( i=0;i<wordContent.GetSize();i++ )
		{
			strTmp2=wordContent[i];
			posTmp1=strTmp2.Right(strTmp2.GetLength()-strTmp2.Find(" ")-1);
			n_prob=atof(posTmp1);
			posTmp1=strTmp2.Left(strTmp2.Find(" "));
			posTmp1=" "+posTmp1+"  ";
			j=0;
			while ( !feof(fpInput2) )
			{
				fgets(szOneLine,c_nMaxLineLen,fpInput2);	
				linTmp2=szOneLine;
				if ( strstr(linTmp2,posTmp1)==0 )
				{
					j=j+1;
				}
				else
				{
					b[j]=n_prob;
					posTmp2=linTmp2.Right(linTmp2.GetLength()-linTmp2.Find(" ")-1);
					posTmp2=posTmp2.Left(posTmp2.Find("  "));
					break;
				}
			}
			fseek(fpInput2,0L,SEEK_SET); 

			s=0;
			h=0;
			if ( m==0 )
			{
				if (beginPro[j]==0 )
				{
					posPro[s]=0;
					viterbiPro[m][j]=0;
				}
				else
				{
					posPro[s]=exp(log(beginPro[j])+log(b[j]));
					viterbiPro[m][j]=posPro[s];
				}
				s=s+1;
				npos.Add(posTmp2);
				count[m][t]=j;
				t=t+1;
				possibleAnn.Add( posTmp2 );
			}
			else
			{
				for ( int l=0;l<viterbiAnn[m-1].GetSize();l++ )
				{
					if ( viterbiPro[m-1][l]==0 || posPairPro[count[m-1][l]][j]==0  )
					{
						posPro[s]=0;
					}
					else
					{
						posPro[s]=exp(log(viterbiPro[m-1][l])+log(posPairPro[count[m-1][l]][j])+log(b[j]));
					}
					s=s+1;
					npos.Add(viterbiAnn[m-1][l]);
					nowcount[h]=j;
					h=h+1;
					possibleAnn.Add(posTmp2);
				}
			}
			if ( possibleAnn.GetSize() != 1 )
			{
				for (  l=1;l<possibleAnn.GetSize();l++ )
				{
					if ( posPro[0]<posPro[l] )
					{
						posPro[0]=posPro[l];
						nowcount[0]=nowcount[l];
						possibleAnn[0]=possibleAnn[l];
						npos[0]=npos[l];
					}
				}				
				h=0;
			}
			viterbiAnn[m].Add(possibleAnn[0]);
			ncount=viterbiAnn[m].GetSize()-1;
			viterbiPro[m][ncount]=posPro[0];
			if ( m != 0 )
			{
				count[m][ncount]=nowcount[0];
			}
			viterbiPos[m].Add(npos[0]);
//			fprintf(fpOutput,"viterbiPro[%d][%d]=%7.6e %s %s\n",m,ncount,viterbiPro[m][ncount],viterbiAnn[m][ncount],viterbiPos[m][ncount]);
			possibleAnn.RemoveAll();
			npos.RemoveAll();
		}
		wordContent.RemoveAll();

		CurNode = CurNode->m_pNextWordNode;
		m=m+1;
	}


	for ( i=m-1;i>=0;i-- )
	{//确定最佳词性序列
		if ( i==m-1 )

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -