📄 dictionary.cpp
字号:
*
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-9
*********************************************************************/
bool CDictionary::FindInOriginalTable(int nInnerCode,char *sWord,int nHandle,int *nPosRet)
{
PWORD_ITEM pItems=m_IndexTable[nInnerCode].pWordItemHead;
int nStart=0,nEnd=m_IndexTable[nInnerCode].nCount-1,nMid=(nStart+nEnd)/2,nCount=0,nCmpValue;
while(nStart<=nEnd)//Binary search
{
nCmpValue=strcmp(pItems[nMid].sWord,sWord);
if(nCmpValue==0&&(pItems[nMid].nHandle==nHandle||nHandle==-1))
{
if(nPosRet)
{
if(nHandle==-1)//Not very strict match
{//Add in 2002-1-28
nMid-=1;
while(nMid>=0&&strcmp(pItems[nMid].sWord,sWord)==0)
//Get the first item which match the current word
nMid--;
if(nMid<0||strcmp(pItems[nMid].sWord,sWord)!=0)
nMid++;
}
*nPosRet=nMid;
return true;
}
if(nPosRet)
*nPosRet=nMid;
return true;//find it
}
else if(nCmpValue<0||(nCmpValue==0&&pItems[nMid].nHandle<nHandle&&nHandle!=-1))
{
nStart=nMid+1;
}
else if(nCmpValue>0||(nCmpValue==0&&pItems[nMid].nHandle>nHandle&&nHandle!=-1))
{
nEnd=nMid-1;
}
nMid=(nStart+nEnd)/2;
}
if(nPosRet)
{
//Get the previous position
*nPosRet=nMid-1;
}
return false;
}
/*********************************************************************
*
* Func Name : FindInModifyTable
*
* Description: judge the word and handle exist in the modified table and its items
*
*
* Parameters : nInnerCode: the inner code of the first CHines char
* sWord: the word
* nHandle:the handle number
* *pFindRet: the node found
*
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-9
*********************************************************************/
bool CDictionary::FindInModifyTable(int nInnerCode,char *sWord,int nHandle,PWORD_CHAIN *pFindRet)
{
PWORD_CHAIN pCur,pPre;
if(m_pModifyTable==NULL)//empty
return false;
pCur=m_pModifyTable[nInnerCode].pWordItemHead;
pPre=NULL;
while(pCur!=NULL&&(_stricmp(pCur->data.sWord,sWord)<0||(_stricmp(pCur->data.sWord,sWord)==0&&pCur->data.nHandle<nHandle)))
//sort the link chain as alphabet
{
pPre=pCur;
pCur=pCur->next;
}
if(pFindRet)
*pFindRet=pPre;
if(pCur!=NULL && _stricmp(pCur->data.sWord,sWord)==0&&(pCur->data.nHandle==nHandle||nHandle<0))
{//The node exists, delete the node and return
return true;
}
return false;
}
/*********************************************************************
*
* Func Name : GetWordType
*
* Description: Get the type of word
*
*
* Parameters : sWord: the word
* Returns : the type
* Author : Kevin Zhang
* History :
* 1.create 2002-1-9
*********************************************************************/
int CDictionary::GetWordType(char *sWord)
{
int nType=charType((unsigned char *)sWord),nLen=strlen(sWord);
if(nLen>0&&nType==CT_CHINESE&&IsAllChinese((unsigned char *)sWord))
return WT_CHINESE;//Chinese word
else if(nLen>0&&nType==CT_DELIMITER)
return WT_DELIMITER;//Delimiter
else
return WT_OTHER;//other invalid
}
/*********************************************************************
*
* Func Name : PreProcessing
*
* Description: Get the type of word
*
*
* Parameters : sWord: the word
* Returns : the type
* Author : Kevin Zhang
* History :
* 1.create 2002-1-9
*********************************************************************/
bool CDictionary::PreProcessing(char *sWord, int *nId, char *sWordRet,bool bAdd)
{
//Position for the delimeters
int nType=charType((unsigned char *)sWord),nLen=strlen(sWord);
int nEnd=nLen-1,nBegin=0;
if(nLen==0)
return false;
while(nEnd>=0&&sWord[nEnd]==' ')
nEnd-=1;
while(nBegin<=nEnd&&sWord[nBegin]==' ')
nBegin+=1;
if(nBegin>nEnd)
return false;
if(nEnd!=nLen-1||nBegin!=0)
{
strncpy(sWord,sWord+nBegin,nEnd-nBegin+1);
sWord[nEnd-nBegin+1]=0;
}
/*
if((bAdd||strlen(sWord)>4)&&IsAllChineseNum(sWord))
{ //Only convert the Chinese Num to 3755 while
//Get the inner code of the first Chinese Char
strcpy(sWord,"五十八");
}
*/
if(nType==CT_CHINESE)//&&IsAllChinese((unsigned char *)sWord)
{//Chinese word
*nId=CC_ID(sWord[0],sWord[1]);
//Get the inner code of the first Chinese Char
strcpy(sWordRet,&sWord[2]);//store the word,not store the first Chinese Char
return true;
}
/* if(nType==CT_NUM&&IsAllNum((unsigned char *)sWord))
{
*nId=3756;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
*/ if(nType==CT_DELIMITER)
{//Delimiter
*nId=3755;
//Get the inner code of the first Chinese Char
strcpy(sWordRet,sWord);//store the word,not store the first Chinese Char
return true;
}
/*
if(nType==CT_LETTER&&IsAllLetter((unsigned char *)sWord))
{
*nId=3757;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
if(nType==CT_SINGLE&&IsAllSingleByte((unsigned char *)sWord))
{
*nId=3758;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
if(nType==CT_INDEX&&IsAllIndex((unsigned char *)sWord))
{
*nId=3759;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
*/
return false;//other invalid
}
/*********************************************************************
*
* Func Name : MergePOS
*
* Description: Merge all the POS into nHandle,
* just get the word in the dictionary and set its Handle as nHandle
*
*
* Parameters : nHandle: the only handle which will be attached to the word
* Returns : the type
* Author : Kevin Zhang
* History :
* 1.create 2002-1-21
*********************************************************************/
bool CDictionary::MergePOS(int nHandle)
{
int i,j,nCompare;
char sWordPrev[WORD_MAXLENGTH];
PWORD_CHAIN pPre,pCur,pTemp;
if(!m_pModifyTable)//Not prepare the buffer
{
m_pModifyTable=new MODIFY_TABLE[CC_NUM];
memset(m_pModifyTable,0,CC_NUM*sizeof(MODIFY_TABLE));
}
for( i=0;i<CC_NUM;i++)//Operation in the index table
{//delete the memory of word item array in the dictionary
sWordPrev[0]=0;//Set empty
for(j=0;j<m_IndexTable[i].nCount;j++)
{
nCompare=_stricmp(sWordPrev,m_IndexTable[i].pWordItemHead[j].sWord);
if((j==0||nCompare<0)&&m_IndexTable[i].pWordItemHead[j].nFrequency!=-1)
{//Need to modify its handle
m_IndexTable[i].pWordItemHead[j].nHandle=nHandle;//Change its handle
strcpy(sWordPrev,m_IndexTable[i].pWordItemHead[j].sWord);//Refresh previous Word
}
else if(nCompare==0&&m_IndexTable[i].pWordItemHead[j].nFrequency!=-1)
{//Need to delete when not delete and same as previous word
m_IndexTable[i].pWordItemHead[j].nFrequency=-1;//Set delete flag
m_pModifyTable[i].nDelete+=1;//Add the number of being deleted
}
}
}
for( i=0;i<CC_NUM;i++)//Operation in the modify table
{
pPre=NULL;
pCur=m_pModifyTable[i].pWordItemHead;
sWordPrev[0]=0;//Set empty
while(pCur!=NULL)
{
if(_stricmp(pCur->data.sWord,sWordPrev)>0)
{//The new word
pCur->data.nHandle=nHandle;//Chang its handle
strcpy(sWordPrev,pCur->data.sWord);//Set new previous word
pPre=pCur;//New previous pointer
pCur=pCur->next;
}
else
{//The same word as previous,delete it.
pTemp=pCur;
if(pPre!=NULL)//pCur is the first item
pPre->next=pCur->next;
else
m_pModifyTable[i].pWordItemHead=pCur->next;
pCur=pCur->next;
delete pTemp->data.sWord;//Delete the word
delete pTemp;//Delete the item
}
}
}
return true;
}
/*********************************************************************
*
* Func Name : GetMaxMatch
*
* Description: Get the max match to the word
*
*
* Parameters : nHandle: the only handle which will be attached to the word
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-21
*********************************************************************/
bool CDictionary::GetMaxMatch(char *sWord, char *sWordRet,int *npHandleRet)
{
char sWordGet[WORD_MAXLENGTH-2],sFirstChar[3];
int nPos,nFoundPos,nTemp;
PWORD_CHAIN pCur;
*npHandleRet=-1;
if(!PreProcessing(sWord, &nPos,sWordGet))
return false;
sWordRet[0]=0;
strncpy(sFirstChar,sWord,strlen(sWord)-strlen(sWordGet));//Get the first char
sFirstChar[strlen(sWord)-strlen(sWordGet)]=0;//Set the end flag
FindInOriginalTable(nPos,sWordGet,-1,&nFoundPos);
nTemp=nFoundPos;//Check its previous position
if(nFoundPos==-1)
nTemp=0;
while(nTemp<m_IndexTable[nPos].nCount&&CC_Find(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)!=m_IndexTable[nPos].pWordItemHead[nTemp].sWord)
{//Get the next
nTemp+=1;
}
if(nTemp<m_IndexTable[nPos].nCount&&CC_Find(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)==m_IndexTable[nPos].pWordItemHead[nTemp].sWord)
{
strcpy(sWordRet,sFirstChar);
strcat(sWordRet,m_IndexTable[nPos].pWordItemHead[nTemp].sWord);
*npHandleRet=m_IndexTable[nPos].pWordItemHead[nTemp].nHandle;
return true;
}//Cannot get the item and retrieve the modified data if exists
//Operation in the index table and its items
if(m_pModifyTable&&m_pModifyTable[nPos].pWordItemHead)//Exists
pCur=m_pModifyTable[nPos].pWordItemHead;
else
pCur=NULL;
while(pCur!=NULL&&strcmp(pCur->data.sWord,sWordGet)<=0&&CC_Find(pCur->data.sWord,sWordGet)!=pCur->data.sWord)//
{
pCur=pCur->next;
}
if(pCur!=NULL&&CC_Find(pCur->data.sWord,sWordGet)!=pCur->data.sWord)
{//Get it
strcpy(sWordRet,sFirstChar);
strcat(sWordRet,pCur->data.sWord);
*npHandleRet=pCur->data.nHandle;
return true;
}
return false;
}
/*********************************************************************
*
* Func Name : GetPOSValue
*
* Description: Get the POS value according the POS string
*
*
* Parameters :
* Returns : the value
* Author : Kevin Zhang
* History :
* 1.create 2002-1-29
*********************************************************************/
int CDictionary::GetPOSValue(char *sPOS)
{
int nPOS;
char *sPlusPos,sTemp[4];
if(strlen(sPOS)<3)
{
nPOS=sPOS[0]*256+sPOS[1];
}
else
{
sPlusPos=strchr(sPOS,'+');
strncpy(sTemp,sPOS,sPlusPos-sPOS);
sTemp[sPlusPos-sPOS]=0;
nPOS=100*GetPOSValue(sTemp);
strncpy(sTemp,sPlusPos+1,4);
nPOS+=atoi(sTemp);
}
return nPOS;
}
/*********************************************************************
*
* Func Name : GetPOSString
*
* Description: Get the POS string according the POS value
*
*
* Parameters :
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-29
*********************************************************************/
bool CDictionary::GetPOSString(int nPOS, char *sPOSRet)
{
if(nPOS>'a'*25600)
{
if((nPOS/100)%256!=0)
sprintf(sPOSRet,"%c%c+%d",nPOS/25600,(nPOS/100)%256,nPOS%100);
else
sprintf(sPOSRet,"%c+%d",nPOS/25600,nPOS%100);
}
else
{
if(nPOS>256)
sprintf(sPOSRet,"%c%c",nPOS/256,nPOS%256);
else
sprintf(sPOSRet,"%c",nPOS%256);
}
return true;
}
int CDictionary::GetFrequency(char *sWord, int nHandle)
{
char sWordFind[WORD_MAXLENGTH-2];
int nPos,nIndex;
PWORD_CHAIN pFound;
if(!PreProcessing(sWord, &nPos,sWordFind))
return 0;
if(FindInOriginalTable(nPos,sWordFind,nHandle,&nIndex))
{
return m_IndexTable[nPos].pWordItemHead[nIndex].nFrequency;
}
if(FindInModifyTable(nPos,sWordFind,nHandle,&pFound))
{
return pFound->data.nFrequency;
}
return 0;
}
bool CDictionary::Output(char *sFilename)
{
FILE *fp;
int i,j;
PWORD_CHAIN pCur;
char sPrevWord[WORD_MAXLENGTH]="", sCurWord[WORD_MAXLENGTH],sPOS[10];
if((fp=fopen(sFilename,"wb"))==NULL)
return false;//fail while opening the file
if(m_pModifyTable)
{//Modification made, not to output when modify table exists.
return false;
}
for(i=0;i<CC_NUM;i++)
{
pCur=NULL;
j=0;
while(j<m_IndexTable[i].nCount)
{
GetPOSString(m_IndexTable[i].pWordItemHead[j].nHandle,sPOS);
//Get the POS string
sprintf(sCurWord,"%c%c%s",CC_CHAR1(i),CC_CHAR2(i),m_IndexTable[i].pWordItemHead[j].sWord);
if(strcmp(sPrevWord,sCurWord)!=0)
fprintf(fp,"\n%s %s",sCurWord,sPOS);
else
fprintf(fp," %s",sPOS);
strcpy(sPrevWord,sCurWord);
j+=1;//Get next item in the original table.
}
}
fclose(fp);
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -