⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 seg12_13(file).c

📁 为自然语言处理领域的中文分词程序
💻 C
📖 第 1 页 / 共 5 页
字号:
   }   
   return 0;
}

void UDictionary(pDictionary p)
{
	int i,j;
	for(i=0;i<CC_NUM;i++)
	{
		for(j=0;j<p->m_IndexTable[i].nCount;j++)
		{
			free(p->m_IndexTable[i].pWordItemHead[j].sWord);
			p->m_IndexTable[i].pWordItemHead[j].sWord=NULL;
		}
		free(p->m_IndexTable[i].pWordItemHead);
		p->m_IndexTable[i].pWordItemHead=NULL;		  
	}
}

void IDictionary(pDictionary p)
{
	int i;
	for(i=0;i<CC_NUM;i++)
	{
		p->m_IndexTable[i].pWordItemHead=NULL;		
		p->m_IndexTable[i].nCount=0;	
	}	
}


int GetItem(pContextState p,int nKey,PMYCONTEXT *pItemRet)
{ 
	PMYCONTEXT pCur=p->m_pContext,pPrev=NULL;
	if(nKey==0&&p->m_pContext)
	{
		*pItemRet=p->m_pContext;
		return TRUE;
	}
	while(pCur!=NULL&&pCur->nKey<nKey)
	{ 
		pPrev=pCur;
		pCur=pCur->next;
	}
    if(pCur!=NULL&&pCur->nKey==nKey)
	{ 
		*pItemRet=pCur;
		return TRUE;
	}
	*pItemRet=pPrev;
	return FALSE;
}


int LoadContextState(pContextState p,char *sFilename)
{ 
	int i;
	FILE *fp;
	PMYCONTEXT pCur,pTemp,pPre=NULL;	
	if((fp=fopen(sFilename,"rb"))==NULL)
	{	
		return FALSE;
	}
	if(!p->m_pContext)
	{
		pCur=p->m_pContext;	
	    while(pCur!=NULL)
		{
			pTemp=pCur->next;
			for(i=0;i<p->m_nTableLen;i++)
			{
				free(pCur->aContextArray[i]);
				pCur->aContextArray[i]=NULL;
			}
			free(pCur->aContextArray);
			pCur->aContextArray=NULL;
			free(pCur->aTagFreq);
			pCur->aTagFreq=NULL;
			free(pCur);
			pCur=pTemp;
		}
		free(pCur);
		pCur=NULL;
	}	
	if(!p->m_pSymbolTable)
	{ 
		free(p->m_pSymbolTable);
		p->m_pSymbolTable=NULL;
	}
	fread(&p->m_nTableLen,sizeof(p->m_nTableLen),1,fp); 
	p->m_pSymbolTable=(int*)malloc( sizeof(int)*p->m_nTableLen); 
	fread(p->m_pSymbolTable,sizeof(int),p->m_nTableLen,fp); 
    while(!feof(fp))
	{
		pCur=(PMYCONTEXT)malloc(sizeof(MYCONTEXT));
		pCur->next=NULL;
		pCur->aContextArray=(int**)malloc(sizeof(int *)*p->m_nTableLen);
		pCur->aTagFreq=(int*)malloc(sizeof(int)*p->m_nTableLen);
		for(i=0;i<p->m_nTableLen;i++)
		{
			pCur->aContextArray[i]=(int *)malloc( sizeof(int)*p->m_nTableLen);
		}
		if(fread(&pCur->nKey,sizeof(int),1,fp)<1)
		{ 
			for(i=0;i<p->m_nTableLen;i++)
			{
				free(pCur->aContextArray[i]);
				pCur->aContextArray[i]=NULL;
			}
			free(pCur->aContextArray);
			pCur->aContextArray=NULL;
			free(pCur->aTagFreq);
			pCur->aTagFreq=NULL;
			free(pCur);
			pCur=NULL;
			break;
		}
        fread(&pCur->nTotalFreq,sizeof(int),1,fp); 
		fread(pCur->aTagFreq,sizeof(int),p->m_nTableLen,fp); 
		for(i=0;i<p->m_nTableLen;i++)
		{		    
			fread(pCur->aContextArray[i],sizeof(int),p->m_nTableLen,fp); 
		}
		if(pPre==NULL)
			p->m_pContext=pCur; 
		else
			pPre->next=pCur; 
		pPre=pCur;		
	}
	fclose(fp);
    return TRUE;
}

double GetContextPossibility(pContextState p,int nKey, int nPrev, int nCur)
{
	PMYCONTEXT pCur;
	double valueTemp;
	int nPrevCurConFreq,nPrevFreq,nCurIndex,nPrevIndex;
	nCurIndex=BinarySearch(nCur,p->m_pSymbolTable,p->m_nTableLen);
	nPrevIndex=BinarySearch(nPrev,p->m_pSymbolTable,p->m_nTableLen);
	if(!GetItem(p,nKey,&pCur)||nCurIndex==-1||nPrevIndex==-1||pCur->aTagFreq[nPrevIndex]==0||pCur->aContextArray[nPrevIndex][nCurIndex]==0)
		return 0.000001; 
	nPrevCurConFreq=pCur->aContextArray[nPrevIndex][nCurIndex];
	nPrevFreq=pCur->aTagFreq[nPrevIndex];	
	valueTemp=0.9*(double)nPrevCurConFreq/(double)nPrevFreq+0.1*(double)nPrevFreq/(double)pCur->nTotalFreq;
	return valueTemp;
}


int GetContextFrequency(pContextState p,int nKey, int nSymbol)
{ 
	PMYCONTEXT pFound;
	int nIndex,nFrequency=0;	
	if(!GetItem(p,nKey,&pFound)) 
	{	
		return 0;
	}
	nIndex=BinarySearch(nSymbol,p->m_pSymbolTable,p->m_nTableLen);
    if(nIndex==-1) 
	{
		return 0;
	}
	nFrequency=pFound->aTagFreq[nIndex];
	return nFrequency;
}


void UContextState(pContextState p)
{
	int i;
	PMYCONTEXT pTemp,pCur;
	if(p->m_pContext)
	{
		pCur=p->m_pContext;
		while(pCur!=NULL)
		{
			pTemp=pCur->next;
			for(i=0;i<p->m_nTableLen;i++)
			{
				free(pCur->aContextArray[i]);
				pCur->aContextArray[i]=NULL;
			}
			free(pCur->aContextArray);
			pCur->aContextArray=NULL;
			free(pCur->aTagFreq);
			pCur->aTagFreq=NULL;
			free(pCur);		
			pCur=pTemp;
		}
		free(pCur);
	    pCur=NULL;
		free(p->m_pContext);
		p->m_pContext=NULL;
		free(p->m_pSymbolTable);
		p->m_pSymbolTable=NULL;
	}	
}

void IContextState(pContextState p)
{
	p->m_pSymbolTable=NULL;
	p->m_pContext=NULL;
}

double GetElementValue(pDynamicArray p,int nRow, int nCol,PARRAY_CHAIN pStart,PARRAY_CHAIN *pRet)
{
   PARRAY_CHAIN pCur=pStart; 
   if(pStart==0)
	    pCur=p->m_pHead; 
   if(pRet!=0)
	   *pRet=NULL; 
   if(nRow>(int)p->m_nRow||nCol>(int)p->m_nCol) 
	   return INFINITE_VALUE;
  if(p->m_bRowFirst)
   { 
	   while(pCur!=NULL&&(nRow!=-1&&(int)pCur->row<nRow||(nCol!=-1&&(int)pCur->row==nRow&&(int)pCur->col<nCol))) 
	   {
		   if(pRet!=0)
			   *pRet=pCur;
		   pCur=pCur->next;
	   }
   }
   else
   { 
	   while(pCur!=NULL&&(nCol!=-1&&(int)pCur->col<nCol||((int)pCur->col==nCol&&nRow!=-1&&(int)pCur->row<nRow))) 
	   {
		   if(pRet!=0)
			   *pRet=pCur;
		   pCur=pCur->next;
	   }
   }
   if(pCur!=NULL&&((int)pCur->row==nRow||nRow==-1)&&((int)pCur->col==nCol||nCol==-1)) 
   { 
	   if(pRet!=0)
		   *pRet=pCur;
	   return pCur->value; 
   }
	return INFINITE_VALUE; 
}

int SetElement(pDynamicArray p,unsigned int nRow, unsigned int nCol, double fValue,int nPOS,char *sWord)
{
   PARRAY_CHAIN pCur=p->m_pHead;
   PARRAY_CHAIN pAdd ,pPre=NULL; 
   if(nRow>p->m_nRow) 
	   p->m_nRow=nRow;
   if(nCol>p->m_nCol) 
	   p->m_nCol=nCol;
   if(p->m_bRowFirst)
   { 
	   while(pCur!=NULL&&(pCur->row<nRow||(pCur->row==nRow&&pCur->col<nCol)))
	   { 
		   pPre=pCur;
		   pCur=pCur->next; 
	   }
   }
   else
   { 
	   while(pCur!=NULL&&(pCur->col<nCol||(pCur->col==nCol&&pCur->row<nRow)))
	   { 
		   pPre=pCur;
		   pCur=pCur->next; 
	   }
   }
   if(pCur!=NULL&&pCur->row==nRow&&pCur->col==nCol)
   { 
	   pCur->value=fValue; 
	   pCur->nPOS=nPOS; 
   }
   else
   { 
       pAdd=(PARRAY_CHAIN)malloc(sizeof(ARRAY_CHAIN)); 
	   pAdd->col=nCol; 
	   pAdd->row=nRow; 
	   pAdd->value=fValue; 
	   pAdd->nPOS=nPOS; 
	   if(sWord)
	   { 
		   pAdd->nWordLen=strlen(sWord); 
		   pAdd->sWord=(char *)malloc((pAdd->nWordLen+1)*sizeof(char)); 
		   strcpy(pAdd->sWord,sWord); 
	   }
	   else
	   { 
		   pAdd->nWordLen=0; 
		   pAdd->sWord=NULL; 
	   }
	   pAdd->next=pCur;
	   if(pPre==NULL) 
		   p->m_pHead=pAdd;
	   else 
		   pPre->next=pAdd;
	   pAdd=NULL;
   }
   return 0;
}

void SetEmpty(pDynamicArray p)
{
  PARRAY_CHAIN pTemp ,pCur=p->m_pHead; 
   while(pCur!=NULL)
   { 
	   pTemp=pCur->next;
       if(pCur->sWord)
	   {
			free(pCur->sWord); 
			pCur->sWord=NULL;
	   }
	   free(pCur);  
	   pCur=pTemp; 
   }
   pTemp=NULL;
   p->m_pHead=NULL;
   p->m_nCol=0; 
   p->m_nRow=0;
}
int GetElement(pDynamicArray p,int nRow, int nCol, double *pRetValue, int *pRetPOS,char *sRetWord)
{
    PARRAY_CHAIN pCur=p->m_pHead; 
	*pRetValue=INFINITE_VALUE;
	*pRetPOS=0;
	  if(nRow>(int)p->m_nRow||nCol>(int)p->m_nCol) 
	   return FALSE;
   if(p->m_bRowFirst)
   { 
	   while(pCur!=NULL&&(nRow!=-1&&(int)pCur->row<nRow||(nCol!=-1&&(int)pCur->row==nRow&&(int)pCur->col<nCol)))
	   { 
		   pCur=pCur->next;
	   }
   }
   else
   { 
	   while(pCur!=NULL&&(nCol!=-1&&(int)pCur->col<nCol||((int)pCur->col==nCol&&nRow!=-1&&(int)pCur->row<nRow)))
	   { 
		   pCur=pCur->next;
	   }
   }
   if(pCur!=NULL&&((int)pCur->row==nRow||nRow==-1)&&((int)pCur->col==nCol||nCol==-1))
   { 
		*pRetValue=pCur->value; 
		if(pRetPOS)
			*pRetPOS=pCur->nPOS; 
	    if(sRetWord)
		{
		   strcpy(sRetWord,pCur->sWord); 
		}
   }
	return TRUE;
}

PARRAY_CHAIN GetHead(pDynamicArray p)
{
	return p->m_pHead;
}

unsigned int GetTail(pDynamicArray p,PARRAY_CHAIN *pTailRet)
{
	PARRAY_CHAIN pPrev=NULL;
    PARRAY_CHAIN pCur=p->m_pHead; 
	unsigned int nCount=0;
	while(pCur!=NULL)
	{ 
		nCount+=1;
		pPrev=pCur;
		pCur=pCur->next;
	}
	*pTailRet=pPrev; 
	return nCount;
} 

int SetRowFirst(pDynamicArray p,int RowFirst)
{	
	p->m_bRowFirst=RowFirst;
	return TRUE;
}

void UDynamicArray(pDynamicArray p)
{
   PARRAY_CHAIN  pTemp=NULL,pCur=p->m_pHead;
   while(pCur!=NULL)
   { 
	   pTemp=pCur->next;
       if(pCur->sWord)
	   {
			free(pCur->sWord); 
			pCur->sWord=NULL;
	   }
	   free(pCur);  
	   pCur=pTemp; 
   }
   //free(pTemp);
   pTemp=NULL;   
   p->m_pHead=NULL;
}

void IDynamicArray(pDynamicArray p,int bRowFirst)
{
	p->m_pHead=NULL;  
    p->m_nRow=0; 
    p->m_nCol=0; 
    p->m_bRowFirst=bRowFirst; 
}

void UQueue(pQueue p)
{
   PQUEUE_ELEMENT pTemp,pCur=p->m_pHead;
   while(pCur!=NULL)
   {
	   pTemp=pCur->next;
	   free(pCur);
	   pCur=pTemp;
   }
   pTemp=NULL;
   free(p->m_pHead);
   p->m_pHead=NULL;
   free(p->m_pLastAccess);
   p->m_pLastAccess=NULL;
}
int Push(pQueue p,unsigned int nValue,unsigned int nIndex, double eWeight)
{
   PQUEUE_ELEMENT pAdd,pCur=p->m_pHead;
   PQUEUE_ELEMENT pPre=NULL;
   while(pCur!=NULL&&pCur->eWeight<eWeight)
   {
	   pPre=pCur;
	   pCur=pCur->next;
   }
   pAdd=(PQUEUE_ELEMENT)malloc(sizeof(QUEUE_ELEMENT));
   pAdd->nParent=nValue;
   pAdd->nIndex=nIndex;
   pAdd->eWeight=eWeight;
   pAdd->next=pCur;
   if(pPre==0)
	   p->m_pHead=pAdd;
   else
	   pPre->next=pAdd;
   pAdd=NULL;
   return 1;
}

int Pop(pQueue p,unsigned int *npValue,unsigned int *npIndex,double *epWeight, int  bModify,int  bFirstGet)
{
	PQUEUE_ELEMENT pTemp;
	if(bModify)
		pTemp=p->m_pHead;
	else
	{
	  if(bFirstGet)
		   p->m_pLastAccess=p->m_pHead;
	  pTemp=p->m_pLastAccess;
	}
	if(pTemp==NULL)
		return -1;
    if(npValue!=0)
	    *npValue=pTemp->nParent;
    if(npIndex!=0)
	    *npIndex=pTemp->nIndex;
    if(epWeight!=0)
		*epWeight=pTemp->eWeight;
    if(bModify)
	{
	   p->m_pHead=pTemp->next;
	}
	else
	{
       p->m_pLastAccess=pTemp->next;
    }
    return 1;
}

int IsEmpty(pQueue p,int bBrowsed)
{
	if(bBrowsed==TRUE)
		return (p->m_pLastAccess==NULL);
   return (p->m_pHead==NULL);
}


int IsSingle(pQueue p)
{
   return (p->m_pHead!=NULL&&p->m_pHead->next==NULL);
}

void IQueue(pQueue p)
{	
	p->m_pHead=NULL; 
	p->m_pLastAccess=NULL; 
}

void USpan(pSpan p)
{	
	free(p->m_context);
	p->m_context=NULL;	
}

int Disamb(pSpan p)
{
	int i,j,k,nMinCandidate;
	double dMinFee=0,dTmp;
	for(i=1;i<p->m_nCurLength;i++) 
	{
		for(j=0;p->m_nTags[i][j]>=0;j++) 
		{
			nMinCandidate=MAX_POS_PER_WORD+1;
			for(k=0;p->m_nTags[i-1][k]>=0;k++)
			{
				dTmp=-log(GetContextPossibility(p->m_context,0,p->m_nTags[i-1][k],p->m_nTags[i][j]));
				dTmp+=p->m_dFrequency[i-1][k]; 
				if(nMinCandidate>10||dTmp<dMinFee) 
				{
					nMinCandidate=k;
					dMinFee=dTmp;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -