📄 seg12_13(file).c
字号:
{
nStartPos=i;
i=GetFrom(p,pWordItems,nStartPos,dictCore,dictUnknown);
GetBestPOS(p);
switch(p->m_tagType)
{
case TT_NORMAL:
j=1;
while(p->m_nBestTag[j]!=-1&&j<p->m_nCurLength)
{
pWordItems[j+nStartPos-1].nHandle=p->m_nBestTag[j];
if(pWordItems[j+nStartPos-1].dValue>0&&IsExist(dictCore,pWordItems[j+nStartPos-1].sWord,-1))
pWordItems[j+nStartPos-1].dValue=GetFrequency(dictCore,pWordItems[j+nStartPos-1].sWord,p->m_nBestTag[j]);
j+=1;
}
break;
case TT_PERSON:
PersonRecognize(p,dictUnknown);
break;
case TT_PLACE:
case TT_TRANS_PERSON:
PlaceRecognize(p,dictCore,dictUnknown);
break;
default:
break;
}
Reset(p,TRUE);
}
return TRUE;
}
void ISpan(pSpan p)
{
if(p->m_tagType!=TT_NORMAL)
p->m_nTags[0][0]=100;
else
p->m_nTags[0][0]=0;
p->m_context=(pContextState)malloc(sizeof(struct ContextState));
IContextState(p->m_context);
p->m_nTags[0][1]=-1;
p->m_dFrequency[0][0]=0;
p->m_nCurLength=1;
p->m_nUnknownIndex=0;
p->m_nStartPos=0;
p->m_nWordPosition[1]=0;
p->m_sWords[0][0]=0;
p->m_tagType=TT_NORMAL;
}
int AtomSegment(pSegGraph p,char *sSentence)
{
unsigned int i=0,j=0;
unsigned int nCurType,nNextType;
char sChar[3];
sChar[2]=0;
p->m_sAtom[j][0]=0;
p->m_nAtomLength[j]=0;
if(strncmp(sSentence,SENTENCE_BEGIN,strlen(SENTENCE_BEGIN))==0)
{
strcpy(p->m_sAtom[j],SENTENCE_BEGIN);
p->m_nAtomLength[j]=strlen(SENTENCE_BEGIN);
p->m_nAtomPOS[j]=CT_SENTENCE_BEGIN;
i+=p->m_nAtomLength[j];
j+=1;
p->m_sAtom[j][0]=0;
p->m_nAtomLength[j]=0;
}
while(i<strlen(sSentence))
{
if(strncmp(sSentence+i,SENTENCE_END,strlen(SENTENCE_END))==0)
{
strcpy(p->m_sAtom[j],SENTENCE_END);
p->m_nAtomLength[j]=strlen(SENTENCE_END);
p->m_nAtomPOS[j]=CT_SENTENCE_END;
i+=p->m_nAtomLength[j];
j+=1;
p->m_sAtom[j][0]=0;
p->m_nAtomLength[j]=0;
continue;
}
sChar[0]=*(sSentence+i);
sChar[1]=0;
i+=1;
if(sChar[0]<0)
{
sChar[1]=*(sSentence+i);
i+=1;
}
strcat(p->m_sAtom[j],sChar);
nCurType=charType((unsigned char *)sChar);
if(sChar[0]=='.'&&(charType((unsigned char *)sSentence+i)==CT_NUM||(*(sSentence+i)>='0'&&*(sSentence+i)<='9')))
nCurType=CT_NUM;
p->m_nAtomPOS[j]=nCurType;
if(nCurType==CT_CHINESE||nCurType==CT_INDEX||nCurType==CT_DELIMITER||nCurType==CT_OTHER)
{
p->m_nAtomLength[j]=strlen(p->m_sAtom[j]);
j+=1;
p->m_sAtom[j][0]=0;
}
else
{
nNextType=255;
if(i<strlen(sSentence))
nNextType=charType((unsigned char *)(sSentence+i));
if(nNextType!=nCurType||i==strlen(sSentence))
{
p->m_nAtomLength[j]=strlen(p->m_sAtom[j]);
j+=1;
p->m_sAtom[j][0]=0;
}
}
}
p->m_nAtomCount=j;
return TRUE;
}
int GenerateWordNet(pSegGraph p,char *sSentence,pDictionary dictCore,int bOriginalFreq)
{
unsigned int j,i=0;
char sWord[WORD_MAXLENGTH]="",sWordMatch[WORD_MAXLENGTH];
int nHandleTemp,k,nPOS;
int nMatchFreq[20],nMatchHandle[20],nTotalFreq,nMatchCount;
double dValue=0;
p->m_nAtomCount=0;
if(p->m_segGraph){
SetEmpty(p->m_segGraph);
}
AtomSegment(p,sSentence);
for(i=0;i<p->m_nAtomCount;i++)
{
if(p->m_nAtomPOS[i]==CT_CHINESE)
{
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,i+1,log(MAX_FREQUENCE),0,0);
else
SetElement(p->m_segGraph,i,i+1,0,0,p->m_sAtom[i]);
}
else
{
strcpy(sWord,p->m_sAtom[i]);
dValue=MAX_FREQUENCE;
switch(p->m_nAtomPOS[i])
{
case CT_INDEX:
case CT_NUM:
nPOS=-27904;
strcpy(sWord,"未##数");
dValue=0;
break;
case CT_DELIMITER:
nPOS=30464;
break;
case CT_LETTER:
nPOS=-'n'*256-'x';
dValue=0;
strcpy(sWord,"未##串");
break;
case CT_SINGLE:
if(GetCharCount("+-1234567890",p->m_sAtom[i])==(int)strlen(p->m_sAtom[i]))
{
nPOS=-27904;
strcpy(sWord,"未##数");
}
else
{
nPOS=-'n'*256-'x';
strcpy(sWord,"未##串");
}
dValue=0;
break;
default:
nPOS=p->m_nAtomPOS[i];
break;
}
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,i+1,0,nPOS,0);
else
SetElement(p->m_segGraph,i,i+1,dValue,nPOS,sWord);
}
}
i=0;
while(i<p->m_nAtomCount)
{
strcpy(sWord,p->m_sAtom[i]);
j=i+1;
if(strcmp(sWord,"月")==0&&strcmp(p->m_sAtom[i+1],"份")==0)
j+=1;
while(j<=p->m_nAtomCount&&GetMaxMatch(dictCore,sWord,sWordMatch,&nHandleTemp))
{
if(strcmp(sWordMatch,sWord)==0)
{
nTotalFreq=0;
GetHandle(dictCore,sWord,&nMatchCount,nMatchHandle,nMatchFreq);
for(k=0;k<nMatchCount;k++)
{
nTotalFreq+=nMatchFreq[k];
}
if(strlen(sWord)==4&&i>=1&&(IsAllNum((unsigned char *)p->m_sAtom[i-1])
||IsAllChineseNum(p->m_sAtom[i-1]))&&(strncmp(sWord,"年",2)==0
||strncmp(sWord,"月",2)==0))
{
if(CC_Find("末内中底前间初",sWord+2))
break;
}
if(nMatchCount==1)
{
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),nMatchHandle[0],0);
else
SetElement(p->m_segGraph,i,j,nTotalFreq,nMatchHandle[0],sWord);
}
else
{
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),0,0);
else
SetElement(p->m_segGraph,i,j,nTotalFreq,0,sWord);
}
}
strcat(sWord,p->m_sAtom[j++]);
}
i+=1;
}
return TRUE;
}
void USegGraph(pSegGraph p)
{
free(p->m_segGraph);
p->m_segGraph=NULL;
}
void ISegGraph(pSegGraph p)
{
p->m_segGraph=(pDynamicArray)malloc(sizeof(struct DynamicArray));
IDynamicArray(p->m_segGraph,FALSE);
SetRowFirst(p->m_segGraph,TRUE);
}
int ShortPath(pNShortPath p)
{
unsigned int nPreNode,i,nIndex,nCurNode;
double eWeight;
PARRAY_CHAIN pEdgeList;
pEdgeList=(PARRAY_CHAIN)malloc(sizeof(ARRAY_CHAIN));
for(nCurNode=1;nCurNode<p->m_nVertex;nCurNode++)
{
pQueue queWork;
queWork=(pQueue)malloc(sizeof(struct Queue));
IQueue(queWork);
eWeight=GetElementValue(p->m_apCost,-1,nCurNode,NULL,&pEdgeList);
while(pEdgeList!=0 && pEdgeList->col==nCurNode)
{
nPreNode=pEdgeList->row;
eWeight=pEdgeList->value;
for(i=0;i<p->m_nValueKind;i++)
{
if(nPreNode>0)
{
if(p->m_pWeight[nPreNode-1][i]==INFINITE_VALUE)
break;
Push(queWork,nPreNode,i,eWeight+p->m_pWeight[nPreNode-1][i]);
}
else
{
Push(queWork,nPreNode,i,eWeight);
break;
}
}
pEdgeList=pEdgeList->next;
}
for(i=0;i<p->m_nValueKind;i++)
{
p->m_pWeight[nCurNode-1][i]=INFINITE_VALUE;
}
i=0;
while(i<p->m_nValueKind&&Pop(queWork,&nPreNode,&nIndex,&eWeight,TRUE,TRUE)!=-1)
{
if(p->m_pWeight[nCurNode-1][i]==INFINITE_VALUE)
p->m_pWeight[nCurNode-1][i]=eWeight;
else if(p->m_pWeight[nCurNode-1][i]<eWeight)
{
i++;
if(i==p->m_nValueKind)
break;
p->m_pWeight[nCurNode-1][i]=eWeight;
}
Push(&p->m_pParent[nCurNode-1][i],nPreNode,nIndex,0);
}
}
return 1;
}
void GetPaths(pNShortPath p,unsigned int nNode,unsigned int nIndex,int **nResult,int bBest)
{
pQueue queResult;
int bFirstGet;
unsigned int nCurNode,nCurIndex,nParentNode;
unsigned int nParentIndex,nResultIndex=0;
queResult=(pQueue)malloc(sizeof(struct Queue));
IQueue(queResult);
if(p->m_nResultCount>=MAX_SEGMENT_NUM)
return ;
nResult[p->m_nResultCount][nResultIndex]=-1;
Push(queResult,nNode,nIndex,0);
nCurNode=nNode;
nCurIndex=nIndex;
while(!IsEmpty(queResult,FALSE))
{
while(nCurNode>0)
{
if(Pop(&p->m_pParent[nCurNode-1][nCurIndex],&nParentNode,&nParentIndex,0,FALSE,TRUE)!=-1)
{
nCurNode=nParentNode;
nCurIndex=nParentIndex;
}
if(nCurNode>0)
Push(queResult,nCurNode,nCurIndex,0);
}
if(nCurNode==0)
{
nResult[p->m_nResultCount][nResultIndex++]=nCurNode;
bFirstGet=TRUE;
nParentNode=nCurNode;
while(Pop(queResult,&nCurNode,&nCurIndex,0,FALSE,bFirstGet)!=-1)
{
nResult[p->m_nResultCount][nResultIndex++]=nCurNode;
bFirstGet=FALSE;
nParentNode=nCurNode;
}
nResult[p->m_nResultCount][nResultIndex]=-1;
p->m_nResultCount+=1;
if(p->m_nResultCount>=MAX_SEGMENT_NUM)
return ;
nResultIndex=0;
nResult[p->m_nResultCount][nResultIndex]=-1;
if(bBest)
return ;
}
Pop(queResult,&nCurNode,&nCurIndex,0,FALSE,TRUE);
while((IsEmpty(queResult,FALSE))==FALSE&&(IsSingle(&p->m_pParent[nCurNode-1][nCurIndex])||IsEmpty(&p->m_pParent[nCurNode-1][nCurIndex],TRUE)))
{
Pop(queResult,&nCurNode,&nCurIndex,0,TRUE,TRUE);
Pop(queResult,&nCurNode,&nCurIndex,0,FALSE,TRUE);
}
if(IsEmpty(queResult,FALSE)==FALSE&&IsEmpty(&p->m_pParent[nCurNode-1][nCurIndex],TRUE)==FALSE)
{
Pop(&p->m_pParent[nCurNode-1][nCurIndex],&nParentNode,&nParentIndex,0,FALSE,FALSE);
nCurNode=nParentNode;
nCurIndex=nParentIndex;
if(nCurNode>0)
Push(queResult,nCurNode,nCurIndex,0);
}
}
}
int OutputPath( pNShortPath p,int **nResult,int bBest,int *npCount)
{
unsigned int i;
p->m_nResultCount=0;
if(p->m_nVertex < 2)
{
nResult[0][0]=0;
nResult[0][1]=1;
*npCount=1;
return 1;
}
for(i=0;i<p->m_nValueKind&&p->m_pWeight[p->m_nVertex-2][i]<INFINITE_VALUE;i++)
{
GetPaths(p,p->m_nVertex-1,i,nResult,bBest);
*npCount=p->m_nResultCount;
if(nResult[i][0]!=-1&&bBest)
return 1;
if(p->m_nResultCount>=MAX_SEGMENT_NUM)
return 1;
}
return 1;
}
void UNShortPath(struct NShortPath *p)
{
unsigned int i;
for(i=0;i<p->m_nVertex-1;i++)
{
free(p->m_pWeight[i]);
p->m_pWeight[i]=NULL;
free( p->m_pParent[i]);
p->m_pParent[i]=NULL;
}
free(p->m_pWeight);
p->m_pWeight=NULL;
free(p->m_pParent);
p->m_pParent=NULL;
UDynamicArray(p->m_apCost);
free(p->m_apCost);
p->m_apCost=NULL;
}
void INShortPath(struct NShortPath *p,pDynamicArray apCost,unsigned int nValueKind)
{
unsigned int i;
p->m_apCost=apCost;
p->m_nValueKind=nValueKind;
p->m_nVertex=apCost->m_nCol+1;
if(p->m_nVertex<apCost->m_nRow+1)
p->m_nVertex=apCost->m_nRow+1;
p->m_pParent=(pQueue*)malloc(sizeof(pQueue)*(p->m_nVertex-1));
p->m_pWeight=(double**)malloc(sizeof(double*)*(p->m_nVertex-1));
for(i=0;i<p->m_nVertex-1;i++)
{
p->m_pParent[i]=(pQueue)malloc(sizeof(struct Queue)*(nValueKind));
IQueue(p->m_pParent[i]);
p->m_pWeight[i]=(double*)malloc(sizeof(double)*nValueKind);
}
}
void IUnknowWord(pUnknowWord p)
{
p->m_sUnknownFlags[0]=0;
p->m_roleTag =(pSpan)malloc(sizeof(struct Span));
ISpan(p->m_roleTag);
p->m_dict=(pDictionary) malloc(sizeof(struct Dictionary));
IDictionary(p->m_dict);
}
void UUnknowWord(pUnknowWord p)
{
USpan(p->m_roleTag);
free(p->m_roleTag);
p->m_roleTag=0;
UDictionary(p->m_dict);
free(p->m_dict);
p->m_dict=0;
}
int Recognition(pUnknowWord p,PWORD_RESULT pWordSegResult, pDynamicArray graphOptimum,pSegGraph graphSeg,pDictionary dictCore)
{
int i;
int nStartPos=0,j=0,nAtomStart,nAtomEnd,nPOSOriginal;
double dValue;
POSTagging(p->m_roleTag,pWordSegResult,dictCore,p->m_dict);
for(i=0;i<p->m_roleTag->m_nUnknownIndex;i++)
{
while((unsigned int)j<graphSeg->m_nAtomCount&&nStartPos<p->m_roleTag->m_nUnknownWords[i][0])
{
nStartPos+=graphSeg->m_nAtomLength[j++];
}
nAtomStart=j;
while((unsigned int)j<graphSeg->m_nAtomCount&&nStartPos<p->m_roleTag->m_nUnknownWords[i][1])
{
nStartPos+=graphSeg->m_nAtomLength[j++];
}
nAtomEnd=j;
if(nAtomStart<nAtomEnd)
{
GetElement(graphOptimum,nAtomStart,nAtomEnd,&dValue,&nPOSOriginal,0);
if(dValue>p->m_roleTag->m_dWordsPossibility[i])
SetElement(graphOptimum,nAtomStart,nAtomEnd,p->m_roleTag->m_dWordsPossibility[i],p->m_nPOS,p->m_sUnknownFlags);
}
}
return TRUE;
}
int Configure(pUnknowWord p,char *sConfigFile,enum TAG_TYPE type)
{
char sFilename[100];
strcpy(sFilename,sConfigFile);
strcat(sFilename,".dct");
LoadDicFile(p->m_dict,sFilename,FALSE);
strcpy(sFilename,sConfigFile);
strcat(sFilename,".ctx");
LoadContext(p->m_roleTag,sFilename);
SetTagType(p->m_roleTag,type);
switch(type)
{
case TT_PERSON:
case TT_TRANS_PERSON:
p->m_nPOS=-28274;
strcpy(p->m_sUnknownFlags,"未##人");
break;
case TT_PLACE:
p->m_nPOS=-28275;
strcpy(p->m_sUnknownFlags,"未##地");
break;
default :
p->m_nPOS=0;
break;
}
return TRUE;
}
int IsGivenName(pUnknowWord p,char *sName)
{
char sFirstChar[3],sSecondChar[3];
double dGivenNamePossibility=0,dSingleNamePossibility=0;
if(strlen(sName)!=4)
return FALSE;
strncpy(sFirstChar,sName,2);
sFirstChar[2]=0;
strncpy(sSecondChar,sName+2,2);
sSecondChar[2]=0;
dGivenNamePossibility+=log((double)GetFrequency(p->m_dict,sFirstChar,2)+1.0)-log(GetContextFrequency(p->m_roleTag->m_context,0,2)+1.0);
dGivenNamePossi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -