⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 segment(sen).c

📁 为自然语言处理领域的中文分词程序
💻 C
📖 第 1 页 / 共 5 页
字号:

int GetMaxMatch(pDictionary p,char *sWord, char *sWordRet,int *npHandleRet)
{
   char sWordGet[WORD_MAXLENGTH-2],sFirstChar[3]; 
   int nPos,nFoundPos,nTemp;
   PWORD_CHAIN pCur; 
   *npHandleRet=-1;
   if(!PreProcessing(sWord, &nPos,sWordGet,FALSE)) 
	   return FALSE;
   sWordRet[0]=0; 
   strncpy(sFirstChar,sWord,strlen(sWord)-strlen(sWordGet)); 
   sFirstChar[strlen(sWord)-strlen(sWordGet)]=0; 
   FindInOriginalTable(p,nPos,sWordGet,-1,&nFoundPos); 
   nTemp=nFoundPos; 
   if(nFoundPos==-1)
		nTemp=0; 
   while(nTemp<p->m_IndexTable[nPos].nCount
	   &&CC_Find(p->m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)!=p->m_IndexTable[nPos].pWordItemHead[nTemp].sWord)
   { 
	 
	   nTemp+=1;	 
   }
   if(nTemp<p->m_IndexTable[nPos].nCount
	   &&CC_Find(p->m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)==p->m_IndexTable[nPos].pWordItemHead[nTemp].sWord)
   { 
	   strcpy(sWordRet,sFirstChar); 
	   strcat(sWordRet,p->m_IndexTable[nPos].pWordItemHead[nTemp].sWord); 
	   *npHandleRet=p->m_IndexTable[nPos].pWordItemHead[nTemp].nHandle; 
	   return TRUE;
   }
   pCur=NULL;
   while(pCur!=NULL&&strcmp(pCur->data.sWord,sWordGet)<=0&&CC_Find(pCur->data.sWord,sWordGet)!=pCur->data.sWord)
   { 
	   pCur=pCur->next;
   }
   if(pCur!=NULL&&CC_Find(pCur->data.sWord,sWordGet)!=pCur->data.sWord)
   { 
	   strcpy(sWordRet,sFirstChar);
	   strcat(sWordRet,pCur->data.sWord); 
	   *npHandleRet=pCur->data.nHandle; 
	   return TRUE;
   }
   return FALSE;
}

int GetFrequency(pDictionary p,char *sWord, int nHandle)
{
   char sWordFind[WORD_MAXLENGTH-2];
   int nPos,nIndex;
   if(!PreProcessing(sWord, &nPos,sWordFind,FALSE)) 
	   return 0;
   if(FindInOriginalTable(p,nPos,sWordFind,nHandle,&nIndex))
   { 
		return p->m_IndexTable[nPos].pWordItemHead[nIndex].nFrequency; 
   }   
   return 0;
}

void UDictionary(pDictionary p)
{
	int i,j;
	for(i=0;i<CC_NUM;i++)
	{
		for(j=0;j<p->m_IndexTable[i].nCount;j++)
		{
			free(p->m_IndexTable[i].pWordItemHead[j].sWord);
			p->m_IndexTable[i].pWordItemHead[j].sWord=NULL;
		}
		free(p->m_IndexTable[i].pWordItemHead);
		p->m_IndexTable[i].pWordItemHead=NULL;		  
	}
}

void IDictionary(pDictionary p)
{
	int i;
	for(i=0;i<CC_NUM;i++)
	{
		p->m_IndexTable[i].pWordItemHead=NULL;		
		p->m_IndexTable[i].nCount=0;	
	}	
}


int GetItem(pContextState p,int nKey,PMYCONTEXT *pItemRet)
{ 
	PMYCONTEXT pCur=p->m_pContext,pPrev=NULL;
	if(nKey==0&&p->m_pContext)
	{
		*pItemRet=p->m_pContext;
		return TRUE;
	}
	while(pCur!=NULL&&pCur->nKey<nKey)
	{ 
		pPrev=pCur;
		pCur=pCur->next;
	}
    if(pCur!=NULL&&pCur->nKey==nKey)
	{ 
		*pItemRet=pCur;
		return TRUE;
	}
	*pItemRet=pPrev;
	return FALSE;
}


int LoadContextState(pContextState p,char *sFilename)
{ 
	int i;
	FILE *fp;
	PMYCONTEXT pCur,pTemp,pPre=NULL;	
	if((fp=fopen(sFilename,"rb"))==NULL)
	{	
		return FALSE;
	}
	if(!p->m_pContext)
	{
		pCur=p->m_pContext;	
	    while(pCur!=NULL)
		{
			pTemp=pCur->next;
			for(i=0;i<p->m_nTableLen;i++)
			{
				free(pCur->aContextArray[i]);
				pCur->aContextArray[i]=NULL;
			}
			free(pCur->aContextArray);
			pCur->aContextArray=NULL;
			free(pCur->aTagFreq);
			pCur->aTagFreq=NULL;
			free(pCur);
			pCur=pTemp;
		}
		free(pCur);
		pCur=NULL;
	}	
	if(!p->m_pSymbolTable)
	{ 
		free(p->m_pSymbolTable);
		p->m_pSymbolTable=NULL;
	}
	fread(&p->m_nTableLen,sizeof(p->m_nTableLen),1,fp); 
	p->m_pSymbolTable=(int*)malloc( sizeof(int)*p->m_nTableLen); 
	fread(p->m_pSymbolTable,sizeof(int),p->m_nTableLen,fp); 
    while(!feof(fp))
	{
		pCur=(PMYCONTEXT)malloc(sizeof(MYCONTEXT));
		pCur->next=NULL;
		pCur->aContextArray=(int**)malloc(sizeof(int *)*p->m_nTableLen);
		pCur->aTagFreq=(int*)malloc(sizeof(int)*p->m_nTableLen);
		for(i=0;i<p->m_nTableLen;i++)
		{
			pCur->aContextArray[i]=(int *)malloc( sizeof(int)*p->m_nTableLen);
		}
		if(fread(&pCur->nKey,sizeof(int),1,fp)<1)
		{ 
			for(i=0;i<p->m_nTableLen;i++)
			{
				free(pCur->aContextArray[i]);
				pCur->aContextArray[i]=NULL;
			}
			free(pCur->aContextArray);
			pCur->aContextArray=NULL;
			free(pCur->aTagFreq);
			pCur->aTagFreq=NULL;
			free(pCur);
			pCur=NULL;
			break;
		}
        fread(&pCur->nTotalFreq,sizeof(int),1,fp); 
		fread(pCur->aTagFreq,sizeof(int),p->m_nTableLen,fp); 
		for(i=0;i<p->m_nTableLen;i++)
		{		    
			fread(pCur->aContextArray[i],sizeof(int),p->m_nTableLen,fp); 
		}
		if(pPre==NULL)
			p->m_pContext=pCur; 
		else
			pPre->next=pCur; 
		pPre=pCur;		
	}
	fclose(fp);
    return TRUE;
}

double GetContextPossibility(pContextState p,int nKey, int nPrev, int nCur)
{
	PMYCONTEXT pCur;
	double valueTemp;
	int nPrevCurConFreq,nPrevFreq,nCurIndex,nPrevIndex;
	nCurIndex=BinarySearch(nCur,p->m_pSymbolTable,p->m_nTableLen);
	nPrevIndex=BinarySearch(nPrev,p->m_pSymbolTable,p->m_nTableLen);
	if(!GetItem(p,nKey,&pCur)||nCurIndex==-1||nPrevIndex==-1||pCur->aTagFreq[nPrevIndex]==0||pCur->aContextArray[nPrevIndex][nCurIndex]==0)
		return 0.000001; 
	nPrevCurConFreq=pCur->aContextArray[nPrevIndex][nCurIndex];
	nPrevFreq=pCur->aTagFreq[nPrevIndex];	
	valueTemp=0.9*(double)nPrevCurConFreq/(double)nPrevFreq+0.1*(double)nPrevFreq/(double)pCur->nTotalFreq;
	return valueTemp;
}


int GetContextFrequency(pContextState p,int nKey, int nSymbol)
{ 
	PMYCONTEXT pFound;
	int nIndex,nFrequency=0;	
	if(!GetItem(p,nKey,&pFound)) 
	{	
		return 0;
	}
	nIndex=BinarySearch(nSymbol,p->m_pSymbolTable,p->m_nTableLen);
    if(nIndex==-1) 
	{
		return 0;
	}
	nFrequency=pFound->aTagFreq[nIndex];
	return nFrequency;
}


void UContextState(pContextState p)
{
	int i;
	PMYCONTEXT pTemp,pCur;
	if(p->m_pContext)
	{
		pCur=p->m_pContext;
		while(pCur!=NULL)
		{
			pTemp=pCur->next;
			for(i=0;i<p->m_nTableLen;i++)
			{
				free(pCur->aContextArray[i]);
				pCur->aContextArray[i]=NULL;
			}
			free(pCur->aContextArray);
			pCur->aContextArray=NULL;
			free(pCur->aTagFreq);
			pCur->aTagFreq=NULL;
			free(pCur);		
			pCur=pTemp;
		}
		free(pCur);
	    pCur=NULL;
		free(p->m_pContext);
		p->m_pContext=NULL;
		free(p->m_pSymbolTable);
		p->m_pSymbolTable=NULL;
	}	
}

void IContextState(pContextState p)
{
	p->m_pSymbolTable=NULL;
	p->m_pContext=NULL;
}

double GetElementValue(pDynamicArray p,int nRow, int nCol,PARRAY_CHAIN pStart,PARRAY_CHAIN *pRet)
{
   PARRAY_CHAIN pCur=pStart; 
   if(pStart==0)
	    pCur=p->m_pHead; 
   if(pRet!=0)
	   *pRet=NULL; 
   if(nRow>(int)p->m_nRow||nCol>(int)p->m_nCol) 
	   return INFINITE_VALUE;
  if(p->m_bRowFirst)
   { 
	   while(pCur!=NULL&&(nRow!=-1&&(int)pCur->row<nRow||(nCol!=-1&&(int)pCur->row==nRow&&(int)pCur->col<nCol))) 
	   {
		   if(pRet!=0)
			   *pRet=pCur;
		   pCur=pCur->next;
	   }
   }
   else
   { 
	   while(pCur!=NULL&&(nCol!=-1&&(int)pCur->col<nCol||((int)pCur->col==nCol&&nRow!=-1&&(int)pCur->row<nRow))) 
	   {
		   if(pRet!=0)
			   *pRet=pCur;
		   pCur=pCur->next;
	   }
   }
   if(pCur!=NULL&&((int)pCur->row==nRow||nRow==-1)&&((int)pCur->col==nCol||nCol==-1)) 
   { 
	   if(pRet!=0)
		   *pRet=pCur;
	   return pCur->value; 
   }
	return INFINITE_VALUE; 
}

int SetElement(pDynamicArray p,unsigned int nRow, unsigned int nCol, double fValue,int nPOS,char *sWord)
{
   PARRAY_CHAIN pCur=p->m_pHead;
   PARRAY_CHAIN pAdd ,pPre=NULL; 
   if(nRow>p->m_nRow) 
	   p->m_nRow=nRow;
   if(nCol>p->m_nCol) 
	   p->m_nCol=nCol;
   if(p->m_bRowFirst)
   { 
	   while(pCur!=NULL&&(pCur->row<nRow||(pCur->row==nRow&&pCur->col<nCol)))
	   { 
		   pPre=pCur;
		   pCur=pCur->next; 
	   }
   }
   else
   { 
	   while(pCur!=NULL&&(pCur->col<nCol||(pCur->col==nCol&&pCur->row<nRow)))
	   { 
		   pPre=pCur;
		   pCur=pCur->next; 
	   }
   }
   if(pCur!=NULL&&pCur->row==nRow&&pCur->col==nCol)
   { 
	   pCur->value=fValue; 
	   pCur->nPOS=nPOS; 
   }
   else
   { 
       pAdd=(PARRAY_CHAIN)malloc(sizeof(ARRAY_CHAIN)); 
	   pAdd->col=nCol; 
	   pAdd->row=nRow; 
	   pAdd->value=fValue; 
	   pAdd->nPOS=nPOS; 
	   if(sWord)
	   { 
		   pAdd->nWordLen=strlen(sWord); 
		   pAdd->sWord=(char *)malloc((pAdd->nWordLen+1)*sizeof(char)); 
		   strcpy(pAdd->sWord,sWord); 
	   }
	   else
	   { 
		   pAdd->nWordLen=0; 
		   pAdd->sWord=NULL; 
	   }
	   pAdd->next=pCur;
	   if(pPre==NULL) 
		   p->m_pHead=pAdd;
	   else 
		   pPre->next=pAdd;
	   pAdd=NULL;
   }
   return 0;
}

void SetEmpty(pDynamicArray p)
{
  PARRAY_CHAIN pTemp ,pCur=p->m_pHead; 
   while(pCur!=NULL)
   { 
	   pTemp=pCur->next;
       if(pCur->sWord)
	   {
			free(pCur->sWord); 
			pCur->sWord=NULL;
	   }
	   free(pCur);  
	   pCur=pTemp; 
   }
   pTemp=NULL;
   p->m_pHead=NULL;
   p->m_nCol=0; 
   p->m_nRow=0;
}
int GetElement(pDynamicArray p,int nRow, int nCol, double *pRetValue, int *pRetPOS,char *sRetWord)
{
    PARRAY_CHAIN pCur=p->m_pHead; 
	*pRetValue=INFINITE_VALUE;
	*pRetPOS=0;
	  if(nRow>(int)p->m_nRow||nCol>(int)p->m_nCol) 
	   return FALSE;
   if(p->m_bRowFirst)
   { 
	   while(pCur!=NULL&&(nRow!=-1&&(int)pCur->row<nRow||(nCol!=-1&&(int)pCur->row==nRow&&(int)pCur->col<nCol)))
	   { 
		   pCur=pCur->next;
	   }
   }
   else
   { 
	   while(pCur!=NULL&&(nCol!=-1&&(int)pCur->col<nCol||((int)pCur->col==nCol&&nRow!=-1&&(int)pCur->row<nRow)))
	   { 
		   pCur=pCur->next;
	   }
   }
   if(pCur!=NULL&&((int)pCur->row==nRow||nRow==-1)&&((int)pCur->col==nCol||nCol==-1))
   { 
		*pRetValue=pCur->value; 
		if(pRetPOS)
			*pRetPOS=pCur->nPOS; 
	    if(sRetWord)
		{
		   strcpy(sRetWord,pCur->sWord); 
		}
   }
	return TRUE;
}

PARRAY_CHAIN GetHead(pDynamicArray p)
{
	return p->m_pHead;
}

unsigned int GetTail(pDynamicArray p,PARRAY_CHAIN *pTailRet)
{
	PARRAY_CHAIN pPrev=NULL;
    PARRAY_CHAIN pCur=p->m_pHead; 
	unsigned int nCount=0;
	while(pCur!=NULL)
	{ 
		nCount+=1;
		pPrev=pCur;
		pCur=pCur->next;
	}
	*pTailRet=pPrev; 
	return nCount;
} 

int SetRowFirst(pDynamicArray p,int RowFirst)
{	
	p->m_bRowFirst=RowFirst;
	return TRUE;
}

void UDynamicArray(pDynamicArray p)
{
   PARRAY_CHAIN pTemp ,pCur=p->m_pHead;
   while(pCur!=NULL)
   { 
	   pTemp=pCur->next;
       if(pCur->sWord)
	   {
			free(pCur->sWord); 
			pCur->sWord=NULL;
	   }
	   free(pCur);  
	   pCur=pTemp; 
   }
   free(pTemp);
   pTemp=NULL;   
   p->m_pHead=NULL;
}

void IDynamicArray(pDynamicArray p,int bRowFirst)
{
	p->m_pHead=NULL;  
    p->m_nRow=0; 
    p->m_nCol=0; 
    p->m_bRowFirst=bRowFirst; 
}

void UQueue(pQueue p)
{
   PQUEUE_ELEMENT pTemp,pCur=p->m_pHead;
   while(pCur!=NULL)
   {
	   pTemp=pCur->next;
	   free(pCur);
	   pCur=pTemp;
   }
   pTemp=NULL;
   free(p->m_pHead);
   p->m_pHead=NULL;
   free(p->m_pLastAccess);
   p->m_pLastAccess=NULL;
}
int Push(pQueue p,unsigned int nValue,unsigned int nIndex, double eWeight)
{
   PQUEUE_ELEMENT pAdd,pCur=p->m_pHead;
   PQUEUE_ELEMENT pPre=NULL;
   while(pCur!=NULL&&pCur->eWeight<eWeight)
   {
	   pPre=pCur;
	   pCur=pCur->next;
   }
   pAdd=(PQUEUE_ELEMENT)malloc(sizeof(QUEUE_ELEMENT));
   pAdd->nParent=nValue;
   pAdd->nIndex=nIndex;
   pAdd->eWeight=eWeight;
   pAdd->next=pCur;
   if(pPre==0)
	   p->m_pHead=pAdd;
   else
	   pPre->next=pAdd;
   pAdd=NULL;
   return 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -