📄 dictionary.cpp
字号:
sWordPrev[0]=0;//Set empty
for(j=0;j<m_IndexTable[i].nCount;j++)
{
nCompare=_stricmp(sWordPrev,m_IndexTable[i].pWordItemHead[j].sWord);
if((j==0||nCompare<0)&&m_IndexTable[i].pWordItemHead[j].nFrequency!=-1)
{//Need to modify its handle
m_IndexTable[i].pWordItemHead[j].nHandle=nHandle;//Change its handle
strcpy(sWordPrev,m_IndexTable[i].pWordItemHead[j].sWord);//Refresh previous Word
}
else if(nCompare==0&&m_IndexTable[i].pWordItemHead[j].nFrequency!=-1)
{//Need to delete when not delete and same as previous word
m_IndexTable[i].pWordItemHead[j].nFrequency=-1;//Set delete flag
m_pModifyTable[i].nDelete+=1;//Add the number of being deleted
}
}
}
for( i=0;i<CC_NUM;i++)//Operation in the modify table
{
pPre=NULL;
pCur=m_pModifyTable[i].pWordItemHead;
sWordPrev[0]=0;//Set empty
while(pCur!=NULL)
{
if(_stricmp(pCur->data.sWord,sWordPrev)>0)
{//The new word
pCur->data.nHandle=nHandle;//Chang its handle
strcpy(sWordPrev,pCur->data.sWord);//Set new previous word
pPre=pCur;//New previous pointer
pCur=pCur->next;
}
else
{//The same word as previous,delete it.
pTemp=pCur;
if(pPre!=NULL)//pCur is the first item
pPre->next=pCur->next;
else
m_pModifyTable[i].pWordItemHead=pCur->next;
pCur=pCur->next;
delete pTemp->data.sWord;//Delete the word
delete pTemp;//Delete the item
}
}
}
return true;
}
/*********************************************************************
*
* Func Name : GetMaxMatch
*
* Description: Get the max match to the word
*
*
* Parameters : nHandle: the only handle which will be attached to the word
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-21
*********************************************************************/
bool CDictionary::GetMaxMatch(char *sWord, char *sWordRet,int *npHandleRet)
{
char sWordGet[WORD_MAXLENGTH-2],sFirstChar[3];
int nPos,nFoundPos,nTemp;
PWORD_CHAIN pCur;
*npHandleRet=-1;
if(!PreProcessing(sWord, &nPos,sWordGet))
return false;
sWordRet[0]=0;
strncpy(sFirstChar,sWord,strlen(sWord)-strlen(sWordGet));//Get the first char
sFirstChar[strlen(sWord)-strlen(sWordGet)]=0;//Set the end flag
FindInOriginalTable(nPos,sWordGet,-1,&nFoundPos);
nTemp=nFoundPos;//Check its previous position
if(nFoundPos==-1)
nTemp=0;
while(nTemp<m_IndexTable[nPos].nCount&&CC_Find(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)!=m_IndexTable[nPos].pWordItemHead[nTemp].sWord)
{//Get the next
nTemp+=1;
}
if(nTemp<m_IndexTable[nPos].nCount&&CC_Find(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)==m_IndexTable[nPos].pWordItemHead[nTemp].sWord)
{
strcpy(sWordRet,sFirstChar);
strcat(sWordRet,m_IndexTable[nPos].pWordItemHead[nTemp].sWord);
*npHandleRet=m_IndexTable[nPos].pWordItemHead[nTemp].nHandle;
return true;
}//Cannot get the item and retrieve the modified data if exists
//Operation in the index table and its items
if(m_pModifyTable&&m_pModifyTable[nPos].pWordItemHead)//Exists
pCur=m_pModifyTable[nPos].pWordItemHead;
else
pCur=NULL;
while(pCur!=NULL&&strcmp(pCur->data.sWord,sWordGet)<=0&&CC_Find(pCur->data.sWord,sWordGet)!=pCur->data.sWord)//
{
pCur=pCur->next;
}
if(pCur!=NULL&&CC_Find(pCur->data.sWord,sWordGet)!=pCur->data.sWord)
{//Get it
strcpy(sWordRet,sFirstChar);
strcat(sWordRet,pCur->data.sWord);
*npHandleRet=pCur->data.nHandle;
return true;
}
return false;
}
/*********************************************************************
*
* Func Name : GetPOSValue
*
* Description: Get the POS value according the POS string
*
*
* Parameters :
* Returns : the value
* Author : Kevin Zhang
* History :
* 1.create 2002-1-29
*********************************************************************/
int CDictionary::GetPOSValue(char *sPOS)
{
int nPOS;
char *sPlusPos,sTemp[4];
if(strlen(sPOS)<3)
{
nPOS=sPOS[0]*256+sPOS[1];
}
else
{
sPlusPos=strchr(sPOS,'+');
strncpy(sTemp,sPOS,sPlusPos-sPOS);
sTemp[sPlusPos-sPOS]=0;
nPOS=100*GetPOSValue(sTemp);
strncpy(sTemp,sPlusPos+1,4);
nPOS+=atoi(sTemp);
}
return nPOS;
}
/*********************************************************************
*
* Func Name : GetPOSString
*
* Description: Get the POS string according the POS value
*
*
* Parameters :
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-29
*********************************************************************/
bool CDictionary::GetPOSString(int nPOS, char *sPOSRet)
{
if(nPOS>'a'*25600)
{
if((nPOS/100)%256!=0)
sprintf(sPOSRet,"%c%c+%d",nPOS/25600,(nPOS/100)%256,nPOS%100);
else
sprintf(sPOSRet,"%c+%d",nPOS/25600,nPOS%100);
}
else
{
if(nPOS>256)
sprintf(sPOSRet,"%c%c",nPOS/256,nPOS%256);
else
sprintf(sPOSRet,"%c",nPOS%256);
}
return true;
}
int CDictionary::GetFrequency(char *sWord, int nHandle)
{
char sWordFind[WORD_MAXLENGTH-2];
int nPos,nIndex;
PWORD_CHAIN pFound;
if(!PreProcessing(sWord, &nPos,sWordFind))
return 0;
if(FindInOriginalTable(nPos,sWordFind,nHandle,&nIndex))
{
return m_IndexTable[nPos].pWordItemHead[nIndex].nFrequency;
}
if(FindInModifyTable(nPos,sWordFind,nHandle,&pFound))
{
return pFound->data.nFrequency;
}
return 0;
}
bool CDictionary::Output(char *sFilename)
{
FILE *fp;
int i,j;
PWORD_CHAIN pCur;
char sPrevWord[WORD_MAXLENGTH]="", sCurWord[WORD_MAXLENGTH],sPOS[10];
if((fp=fopen(sFilename,"wb"))==NULL)
return false;//fail while opening the file
if(m_pModifyTable)
{//Modification made, not to output when modify table exists.
return false;
}
for(i=0;i<CC_NUM;i++)
{
pCur=NULL;
j=0;
while(j<m_IndexTable[i].nCount)
{
GetPOSString(m_IndexTable[i].pWordItemHead[j].nHandle,sPOS);
//Get the POS string
sprintf(sCurWord,"%c%c%s",CC_CHAR1(i),CC_CHAR2(i),m_IndexTable[i].pWordItemHead[j].sWord);
if(strcmp(sPrevWord,sCurWord)!=0)
fprintf(fp,"\n%s %s",sCurWord,sPOS);
else
fprintf(fp," %s",sPOS);
strcpy(sPrevWord,sCurWord);
j+=1;//Get next item in the original table.
}
}
fclose(fp);
return true;
}
bool CDictionary::OutputChars(char *sFilename)
{
FILE *fp;
int i,j;
char sPrevWord[WORD_MAXLENGTH]="", sCurWord[WORD_MAXLENGTH];
if((fp=fopen(sFilename,"wb"))==NULL)
return false;//fail while opening the file
if(m_pModifyTable)
{//Modification made, not to output when modify table exists.
return false;
}
for(i=0;i<CC_NUM;i++)
{
j=0;
while(j<m_IndexTable[i].nCount)
{
sprintf(sCurWord,"%c%c%s",CC_CHAR1(i),CC_CHAR2(i),m_IndexTable[i].pWordItemHead[j].sWord);
if(strcmp(sPrevWord,sCurWord)!=0&&m_IndexTable[i].pWordItemHead[j].nFrequency>50)//
fprintf(fp,"%s",sCurWord);
strcpy(sPrevWord,sCurWord);
j+=1;//Get next item in the original table.
}
}
fclose(fp);
return true;
}
bool CDictionary::Merge(CDictionary dict2, int nRatio)
//Merge dict2 into current dictionary and the frequency ratio from dict2 and current dict is nRatio
{
int i,j,k,nCmpValue;
char sWord[WORD_MAXLENGTH];
if(m_pModifyTable||dict2.m_pModifyTable)
{//Modification made, not to output when modify table exists.
return false;
}
for(i=0;i<CC_NUM;i++)
{
j=0;
k=0;
while(j<m_IndexTable[i].nCount&&k<dict2.m_IndexTable[i].nCount)
{
nCmpValue=strcmp(m_IndexTable[i].pWordItemHead[j].sWord,dict2.m_IndexTable[i].pWordItemHead[k].sWord);
if(nCmpValue==0)//Same Words and determine the different handle
{
if(m_IndexTable[i].pWordItemHead[j].nHandle<dict2.m_IndexTable[i].pWordItemHead[k].nHandle)
nCmpValue=-1;
else if(m_IndexTable[i].pWordItemHead[j].nHandle>dict2.m_IndexTable[i].pWordItemHead[k].nHandle)
nCmpValue=1;
}
if(nCmpValue==0)
{
m_IndexTable[i].pWordItemHead[j].nFrequency=(nRatio*m_IndexTable[i].pWordItemHead[j].nFrequency+dict2.m_IndexTable[i].pWordItemHead[k].nFrequency)/(nRatio+1);
j+=1;
k+=1;
}
else if(nCmpValue<0)//Get next word in the current dictionary
{
m_IndexTable[i].pWordItemHead[j].nFrequency=(nRatio*m_IndexTable[i].pWordItemHead[j].nFrequency)/(nRatio+1);
j+=1;
}
else//Get next word in the second dictionary
{
if(dict2.m_IndexTable[i].pWordItemHead[k].nFrequency>(nRatio+1)/10)
{
sprintf(sWord,"%c%c%s",CC_CHAR1(i),CC_CHAR2(i),dict2.m_IndexTable[i].pWordItemHead[k].sWord);
AddItem(sWord,dict2.m_IndexTable[i].pWordItemHead[k].nHandle,dict2.m_IndexTable[i].pWordItemHead[k].nFrequency/(nRatio+1));
}
k+=1;
}
}
while(j<m_IndexTable[i].nCount)//words in current dictionary are left
{
m_IndexTable[i].pWordItemHead[j].nFrequency=(nRatio*m_IndexTable[i].pWordItemHead[j].nFrequency)/(nRatio+1);
j+=1;
}
while(k<dict2.m_IndexTable[i].nCount)//words in Dict2 are left
{
if(dict2.m_IndexTable[i].pWordItemHead[k].nFrequency>(nRatio+1)/10)
{
sprintf(sWord,"%c%c%s",CC_CHAR1(i),CC_CHAR2(i),dict2.m_IndexTable[i].pWordItemHead[k].sWord);
AddItem(sWord,dict2.m_IndexTable[i].pWordItemHead[k].nHandle,dict2.m_IndexTable[i].pWordItemHead[k].nFrequency/(nRatio+1));
}
k+=1;
}
}
return true;
}
//Delete word item which
//(1)frequency is 0
//(2)word is same as following but the POS value is parent set of the following
//for example "江泽民/n/0" will deleted, because "江泽民/nr/0" is more detail and correct
bool CDictionary::Optimum()
{
int nPrevPOS,i,j,nPrevFreq;
char sPrevWord[WORD_MAXLENGTH],sCurWord[WORD_MAXLENGTH];
for(i=0;i<CC_NUM;i++)
{
j=0;
sPrevWord[0]=0;
nPrevPOS=0;
nPrevFreq=-1;
while(j<m_IndexTable[i].nCount)
{
sprintf(sCurWord,"%c%c%s",CC_CHAR1(i),CC_CHAR2(i),m_IndexTable[i].pWordItemHead[j].sWord);
if(nPrevPOS==30720||nPrevPOS==26368||nPrevPOS==29031||(strcmp(sPrevWord,sCurWord)==0&&nPrevFreq==0&&m_IndexTable[i].pWordItemHead[j].nHandle/256*256==nPrevPOS))
{//Delete Previous word item
//Delete word with POS 'x','g' 'qg'
DelItem(sPrevWord,nPrevPOS);
}
strcpy(sPrevWord,sCurWord);
nPrevPOS=m_IndexTable[i].pWordItemHead[j].nHandle;
nPrevFreq=m_IndexTable[i].pWordItemHead[j].nFrequency;
j+=1;//Get next item in the original table.
}
}
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -