📄 result.cpp
字号:
// Result.cpp: implementation of the CResult class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "Result.h"
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "..\\Utility\\Utility.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CResult::CResult()
{
//malloc buffer
m_pResult=new PWORD_RESULT[MAX_SEGMENT_NUM];
for(int i=0;i<MAX_SEGMENT_NUM;i++)
{
m_pResult[i]=new WORD_RESULT[MAX_WORDS];
}
m_dictCore.Load("data\\coreDict.dct");
m_POSTagger.LoadContext("data\\lexical.ctx");
/*
m_dictCore.Load("data\\Dict.dct");
m_POSTagger.LoadContext("data\\trainTest.ctx");
*/
/*
m_dictCore.AddItem("十分",'d'*256,500);
m_dictCore.AddItem("十分",'m'*256,-500);
m_dictCore.Save("data\\coreDict.dct");
*/
m_POSTagger.SetTagType();
m_Person.Configure("data\\Person",TT_PERSON);
//Set the person recognition configure
m_Transliteration.Configure("data\\Trans",TT_TRANS);
//Set the transliteration recognition configure
m_Place.Configure("data\\Place",TT_PLACE);
//Set the place recognition configure
m_nOperateType=2;//0:Only Segment;1: First Tag; 2:Second Type
m_nOutputFormat=0;//0:PKU criterion;1:973 criterion; 2: XML criterion
}
CResult::~CResult()
{
//free buffer
for(int i=0;i<MAX_SEGMENT_NUM;i++)
{
delete [] m_pResult[i];
}
delete m_pResult;
}
bool CResult::Output(PWORD_RESULT pItem, char *sResult,bool bFirstWordIgnore)
{
int i=0;
char sTempBuffer[WORD_MAXLENGTH],sPOS[3];
sPOS[2]=0;
sResult[0]=0;
if(bFirstWordIgnore)//Ignore first valid
i=1;
while(pItem[i].sWord[0]!=0&&pItem[i].nHandle!=CT_SENTENCE_END)//Not sentence ending flag
{
//Get the POS string
if(m_nOutputFormat!=0)//Not PKU format
PKU2973POS(pItem[i].nHandle,sPOS);
else//PKU format
{
sPOS[0]=pItem[i].nHandle/256;
sPOS[1]=pItem[i].nHandle%256;
}
sPOS[m_nOperateType]=0;//Set the sPOS with operate type
if(m_nOutputFormat==0)//PKU format
{
sprintf(sTempBuffer,"%s",pItem[i].sWord);
strcat(sResult,sTempBuffer);
if(sPOS[0]!=0)//need POS
{
sprintf(sTempBuffer,"/%s",sPOS);
strcat(sResult,sTempBuffer);
}
strcat(sResult," ");
}
else if(m_nOutputFormat==1)//973 format
{
sprintf(sTempBuffer,"%s\\",pItem[i].sWord);
strcat(sResult,sTempBuffer);
if(sPOS[0]!=0)//need POS
{
sprintf(sTempBuffer,"[%s]",sPOS);
strcat(sResult,sTempBuffer);
}
}
else if(m_nOutputFormat==2)//XML format
{
if(sPOS[0]!=0)//POS
{
sprintf(sTempBuffer,"<any type=\042%s\042>",sPOS);
strcat(sResult,sTempBuffer);
}
sprintf(sTempBuffer,"<src>%s</src>",pItem[i].sWord);
strcat(sResult,sTempBuffer);
if(sPOS[0]!=0)
{
strcat(sResult,"</any>");
}
}
i++;
}
return true;
}
bool CResult::Processing(char *sSentence,unsigned int nCount)
{
int nIndex;
#if _ICT_DEBUG
char *sSegment;
sSegment=new char[MAX_SENTENCE_LEN*2];
#endif
m_Seg.Segment(sSentence,m_dictCore,nCount);
m_nResultCount=m_Seg.m_nSegmentCount;
//Record the number of result
for(nIndex=0;nIndex<m_Seg.m_nSegmentCount;nIndex++)
{
m_POSTagger.POSTagging(m_Seg.m_pWordSeg[nIndex],m_dictCore,m_dictCore);
#if _ICT_DEBUG
Output(m_Seg.m_pWordSeg[nIndex],sSegment);
printf("POS Tag%d:%s\n",nIndex+1,sSegment);
#endif
m_Person.Recognition(m_Seg.m_pWordSeg[nIndex],m_Seg.m_graphOptimum,m_Seg.m_graphSeg,m_dictCore);
m_Transliteration.Recognition(m_Seg.m_pWordSeg[nIndex],m_Seg.m_graphOptimum,m_Seg.m_graphSeg,m_dictCore);
m_Place.Recognition(m_Seg.m_pWordSeg[nIndex],m_Seg.m_graphOptimum,m_Seg.m_graphSeg,m_dictCore);
}
//m_Person.Recognition(m_Seg.m_WordSeg[0],m_Seg.m_graphOptimum,m_Seg.m_graphSeg,m_dictCore);
//Person Recognition
#if _ICT_DEBUG
printf("After person recognition.\n");
#endif
m_Seg.OptimumSegmet(nCount);
for(nIndex=0;nIndex<m_Seg.m_nSegmentCount;nIndex++)
{
m_POSTagger.POSTagging(m_Seg.m_pWordSeg[nIndex],m_dictCore,m_dictCore);
#if _ICT_DEBUG
Output(m_Seg.m_pWordSeg[nIndex],sSegment);
printf("POS Tag%d:%s\n",nIndex+1,sSegment);
#endif
}
#if _ICT_DEBUG
printf("After Sorting.\n");
#endif
Sort();//Sort the ending
#if _ICT_DEBUG
for(nIndex=0;nIndex<m_Seg.m_nSegmentCount;nIndex++)
{
Output(m_pResult[nIndex],sSegment);
printf("POS Tag%d(P=Exp(%f)):%s\n",nIndex+1,m_dResultPossibility[nIndex],sSegment);
}
delete [] sSegment;
#endif
return true;
}
//Sort the segmentation and POS result according its possibility
bool CResult::Sort()
{
ELEMENT_TYPE dPossibility[MAX_SEGMENT_NUM],dTemp;
int nIndex[MAX_SEGMENT_NUM],nTemp;//Index
memset(dPossibility,0,sizeof(dPossibility));
//Init the possibility
for(int i=0;i<m_Seg.m_nSegmentCount;i++)
{//Computing the possibility
dPossibility[i]=ComputePossibility(m_Seg.m_pWordSeg[i]);
nIndex[i]=i;//Record the index
}
//Sort with Bubble sort algorithm
for(i=0;i<m_Seg.m_nSegmentCount;i++)
for(int j=i+1;j<m_Seg.m_nSegmentCount;j++)
{
if(dPossibility[i]<dPossibility[j])
{//Swap the possition and value
nTemp=nIndex[i];
dTemp=dPossibility[i];
nIndex[i]=nIndex[j];
dPossibility[i]=dPossibility[j];
nIndex[j]=nTemp;
dPossibility[j]=dTemp;
}
}
for(i=0;i<m_Seg.m_nSegmentCount;i++)
{//Adjust the segmentation and POS result and store them in the final result array
//Store them according their possibility ascendly
Adjust(m_Seg.m_pWordSeg[nIndex[i]],m_pResult[i]);
m_dResultPossibility[i]=dPossibility[i];
}
return true;
}
//Compute the possibility of current segmentation and POS result
ELEMENT_TYPE CResult::ComputePossibility(PWORD_RESULT pItem)
{
int i=0;
ELEMENT_TYPE dResultPossibility=0;
while(pItem[i].sWord[0]!=0)
{
dResultPossibility+=pItem[i].dValue;
//Compute the possibility of logP(Wi|Ti)
if(pItem[i+1].sWord[0]!=0)//Not the last one
{//Compute the possibility of logP(Ti|Ti-1)
dResultPossibility+=log((double)(m_POSTagger.m_context.GetContextPossibility(0,pItem[i].nHandle,pItem[i+1].nHandle)+1));
dResultPossibility-=log((double)(m_POSTagger.m_context.GetFrequency(0,pItem[i].nHandle)+1));
}
i++;
}
return dResultPossibility;
}
//Adjust the result with some rules
bool CResult::Adjust(PWORD_RESULT pItem,PWORD_RESULT pItemRet)
{
int i=0,j=0;
unsigned int nLen;
char sSurName[10],sSurName2[10],sGivenName[10];
bool bProcessed=false;//Have been processed
while(pItem[i].sWord[0]!=0)
{
nLen=strlen(pItem[i].sWord);
bProcessed=false;
//Rule1: adjust person name
if(pItem[i].nHandle==28274&&ChineseNameSplit(pItem[i].sWord,sSurName,sSurName2,sGivenName,m_Person.m_dict)&&strcmp(pItem[i].sWord,"叶利钦")!=0)//'nr'
{//Divide name into surname and given name
if(sSurName[0])
{
strcpy(pItemRet[j].sWord,sSurName);
pItemRet[j++].nHandle=28274;
}
if(sSurName2[0])
{
strcpy(pItemRet[j].sWord,sSurName2);
pItemRet[j++].nHandle=28274;
}
if(sGivenName[0])
{
strcpy(pItemRet[j].sWord,sGivenName);
pItemRet[j++].nHandle=28274;
}
bProcessed=true;
}
//Rule2 for overlap words ABB 一段段、一片片
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -