⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dictionary.cpp

📁 计算所汉语词法分析系统ICTCLAS介绍 词是最小的能够独立活动的有意义的语言成分。 但汉语是以字为基本的书写单位
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/****************************************************************************
 *
 * Copyright (c) 2000, 2001 
 *     Machine Group
 *     Software Research Lab.
 *     Institute of Computing Tech.
 *     Chinese Academy of Sciences
 *     All rights reserved.
 *
 * This file is the confidential and proprietary property of 
 * Institute of Computing Tech. and the posession or use of this file requires 
 * a written license from the author.
 * Filename: Dictionary.cpp
 * Abstract:
 *           implementation of the CDictionary class.
 * Author:   Kevin Zhang 
 *          (zhanghp@software.ict.ac.cn)
 * Date:     2002-1-8
 *
 * Notes:
 *                
 ****************************************************************************/
#include "stdafx.h"
#include "Dictionary.h"
#include "Utility.h"
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CDictionary::CDictionary()
{
  //initilization
  memset(m_IndexTable,0,sizeof(m_IndexTable));
  m_pModifyTable=NULL;
}

CDictionary::~CDictionary()
{
	for(int i=0;i<CC_NUM;i++)
	{//delete the memory of word item array in the dictionary
		for(int j=0;j<m_IndexTable[i].nCount;j++)
			delete m_IndexTable[i].pWordItemHead[j].sWord;
		delete [] m_IndexTable[i].pWordItemHead;
	}
    DelModified();
}
/*********************************************************************
 *
 *  Func Name  : Load
 *
 *  Description: Load the dictionary from the file .dct
 *              
 *
 *  Parameters : sFilename: the file name
 *
 *  Returns    : success or fail
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-9
 *********************************************************************/
bool CDictionary::Load(char *sFilename,bool bReset)
{
   FILE *fp;
   int i,j,nBuffer[3];
   if((fp=fopen(sFilename,"rb"))==NULL)
	   return false;//fail while opening the file
   	
   //Release the memory for new files
   for( i=0;i<CC_NUM;i++)
	{//delete the memory of word item array in the dictionary
		for( j=0;j<m_IndexTable[i].nCount;j++)
			delete m_IndexTable[i].pWordItemHead[j].sWord;
		delete [] m_IndexTable[i].pWordItemHead;
	}
    DelModified();
   for(i=0;i<CC_NUM;i++)
   {
	   fread(&(m_IndexTable[i].nCount),sizeof(int),1,fp);
       if(m_IndexTable[i].nCount>0)
	     m_IndexTable[i].pWordItemHead=new WORD_ITEM[m_IndexTable[i].nCount];
	   else 
	   {
		   m_IndexTable[i].pWordItemHead=0;
		   continue;
	   }
       j=0;
	   while(j<m_IndexTable[i].nCount)
	   {
         fread(nBuffer,sizeof(int),3,fp);
         m_IndexTable[i].pWordItemHead[j].sWord=new char[nBuffer[1]+1];
  		 if(nBuffer[1])//String length is more than 0
		 {
			 fread(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
		 }
		 m_IndexTable[i].pWordItemHead[j].sWord[nBuffer[1]]=0;
  	     if(bReset)//Reset the frequency
	          m_IndexTable[i].pWordItemHead[j].nFrequency=0;
		 else
              m_IndexTable[i].pWordItemHead[j].nFrequency=nBuffer[0];
		 m_IndexTable[i].pWordItemHead[j].nWordLen=nBuffer[1];
		 m_IndexTable[i].pWordItemHead[j].nHandle=nBuffer[2];
 		 j+=1;//Get next item in the original table.
	   }
   }
   fclose(fp);
   return true;
}
/*********************************************************************
 *
 *  Func Name  : Save
 *
 *  Description: Save the dictionary as the file .dct
 *              
 *
 *  Parameters : sFilename: the file name
 *
 *  Returns    : success or fail
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-9
 *********************************************************************/

bool CDictionary::Save(char *sFilename)
{
   FILE *fp;
   int i,j,nCount,nBuffer[3];
   PWORD_CHAIN pCur;
   if((fp=fopen(sFilename,"wb"))==NULL)
	   return false;//fail while opening the file
   for(i=0;i<CC_NUM;i++)
   {
	   pCur=NULL;
	   if(m_pModifyTable)
	   {//Modification made
		   nCount=m_IndexTable[i].nCount+m_pModifyTable[i].nCount-m_pModifyTable[i].nDelete;
		   fwrite(&nCount,sizeof(int),1,fp);
		   pCur=m_pModifyTable[i].pWordItemHead;
		   j=0;
		   while(pCur!=NULL&&j<m_IndexTable[i].nCount)
		   {//Output to the file after comparision
			   if(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)<0||(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)==0&&pCur->data.nHandle<m_IndexTable[i].pWordItemHead[j].nHandle))
			   {//Output the modified data to the file
				 nBuffer[0]=pCur->data.nFrequency;
		         nBuffer[1]=pCur->data.nWordLen;
			     nBuffer[2]=pCur->data.nHandle;
                 fwrite(nBuffer,sizeof(int),3,fp);
				 if(nBuffer[1])//String length is more than 0
  			       fwrite(pCur->data.sWord,sizeof(char),nBuffer[1],fp);
                 pCur=pCur->next;//Get next item in the modify table.
			   }
			   else if(m_IndexTable[i].pWordItemHead[j].nFrequency==-1)
			   {//The item has been removed,so skip it
				   j+=1;
			   }
			   else if(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)>0||(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)==0&&pCur->data.nHandle>m_IndexTable[i].pWordItemHead[j].nHandle))
			   {//Output the index table data to the file
				 nBuffer[0]=m_IndexTable[i].pWordItemHead[j].nFrequency;
				 nBuffer[1]=m_IndexTable[i].pWordItemHead[j].nWordLen;
				 nBuffer[2]=m_IndexTable[i].pWordItemHead[j].nHandle;
				 fwrite(nBuffer,sizeof(int),3,fp);
				 if(nBuffer[1])//String length is more than 0
  					fwrite(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
				 j+=1;//Get next item in the original table.
			   }
		   }
		   if(j<m_IndexTable[i].nCount)
		   {
			   while(j<m_IndexTable[i].nCount)
			   {
				   if(m_IndexTable[i].pWordItemHead[j].nFrequency!=-1)
				   {//Has been deleted
					 nBuffer[0]=m_IndexTable[i].pWordItemHead[j].nFrequency;
					 nBuffer[1]=m_IndexTable[i].pWordItemHead[j].nWordLen;
					 nBuffer[2]=m_IndexTable[i].pWordItemHead[j].nHandle;
					 fwrite(nBuffer,sizeof(int),3,fp);
					 if(nBuffer[1])//String length is more than 0
  						fwrite(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
				   }
 				 j+=1;//Get next item in the original table.
			   }
		   }
		   else////No Modification
			   while(pCur!=NULL)//Add the rest data to the file.
			   {
				 nBuffer[0]=pCur->data.nFrequency;
		         nBuffer[1]=pCur->data.nWordLen;
			     nBuffer[2]=pCur->data.nHandle;
                 fwrite(nBuffer,sizeof(int),3,fp);
				 if(nBuffer[1])//String length is more than 0
  					fwrite(pCur->data.sWord,sizeof(char),nBuffer[1],fp);
                 pCur=pCur->next;//Get next item in the modify table.
			   }
	   }
	   else
	   {
		   fwrite(&m_IndexTable[i].nCount,sizeof(int),1,fp);
		   //write to the file
           j=0;  
		   while(j<m_IndexTable[i].nCount)
		   {
			 nBuffer[0]=m_IndexTable[i].pWordItemHead[j].nFrequency;
		     nBuffer[1]=m_IndexTable[i].pWordItemHead[j].nWordLen;
			 nBuffer[2]=m_IndexTable[i].pWordItemHead[j].nHandle;
             fwrite(nBuffer,sizeof(int),3,fp);
			 if(nBuffer[1])//String length is more than 0
  					fwrite(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
 			 j+=1;//Get next item in the original table.
		   }
	   }
   }
   fclose(fp);
   return true;
}
/*********************************************************************
 *
 *  Func Name  : AddItem
 *
 *  Description: Add a word item to the dictionary
 *              
 *
 *  Parameters : sWord: the word
 *               nHandle:the handle number
 *               nFrequency: the frequency
 *  Returns    : success or fail
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-9
 *********************************************************************/

bool CDictionary::AddItem(char *sWord, int nHandle,int nFrequency)
{

   char sWordAdd[WORD_MAXLENGTH-2];
   int nPos,nFoundPos;
   PWORD_CHAIN pRet,pTemp,pNext;
   int i=0;
   if(!PreProcessing(sWord, &nPos,sWordAdd,true))
	   return false;
   if(FindInOriginalTable(nPos,sWordAdd,nHandle,&nFoundPos))
   {//The word exists in the original table, so add the frequency
    //Operation in the index table and its items 
       if(m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency==-1)
	   {//The word item has been removed
	      m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency=nFrequency;
 	      if(!m_pModifyTable)//Not prepare the buffer
		  {
			   m_pModifyTable=new MODIFY_TABLE[CC_NUM];
			   memset(m_pModifyTable,0,CC_NUM*sizeof(MODIFY_TABLE));
		  }
		  m_pModifyTable[nPos].nDelete-=1;
	   }
	   else
		  m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency+=nFrequency;
	   return true;
   }

   //The items not exists in the index table.
   //As following, we have to find the item whether exists in the modify data region
   //If exists, change the frequency .or else add a item
   if(!m_pModifyTable)//Not prepare the buffer
   {
	   m_pModifyTable=new MODIFY_TABLE[CC_NUM];
	   memset(m_pModifyTable,0,CC_NUM*sizeof(MODIFY_TABLE));
   }
   if(FindInModifyTable(nPos,sWordAdd,nHandle,&pRet))
   {
	   if(pRet!=NULL)
		   pRet=pRet->next;
	   else
		   pRet=m_pModifyTable[nPos].pWordItemHead;
	   pRet->data.nFrequency+=nFrequency;
	   return true;
   }
   //find the proper position to add the word to the modify data table and link
   pTemp=new WORD_CHAIN;//Allocate the word chain node
   if(pTemp==NULL)//Allocate memory failure
	   return false;
   memset(pTemp,0,sizeof(WORD_CHAIN));//init it with 0
   pTemp->data.nHandle=nHandle;//store the handle 
   pTemp->data.nWordLen=strlen(sWordAdd);
   pTemp->data.sWord=new char[1+pTemp->data.nWordLen];
   strcpy(pTemp->data.sWord,sWordAdd);
   pTemp->data.nFrequency=nFrequency;
   pTemp->next=NULL;   
   if(pRet!=NULL)
   {
	   pNext=pRet->next;//Get the next item before the current item
	   pRet->next=pTemp;//link the node to the chain
   }
   else
   {
	   pNext=m_pModifyTable[nPos].pWordItemHead;
	   m_pModifyTable[nPos].pWordItemHead=pTemp;//Set the pAdd as the head node
   }
   pTemp->next=pNext;//Very important!!!! or else it will lose some node
   //Modify in 2001-10-29
   m_pModifyTable[nPos].nCount++;//the number increase by one
   return true;
}
bool CDictionary::DelItem(char *sWord,int nHandle)
{
   char sWordDel[WORD_MAXLENGTH-2];
   int nPos,nFoundPos,nTemp;
   PWORD_CHAIN pPre,pTemp,pCur;
   if(!PreProcessing(sWord, &nPos,sWordDel))
	   return false;
   if(FindInOriginalTable(nPos,sWordDel,nHandle,&nFoundPos))
   {
       if(!m_pModifyTable)//Not prepare the buffer
	   {
		   m_pModifyTable=new MODIFY_TABLE[CC_NUM];
		   memset(m_pModifyTable,0,CC_NUM*sizeof(MODIFY_TABLE));
	   }
       m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency=-1;
	   m_pModifyTable[nPos].nDelete+=1;
	   if(nHandle==-1)//Remove all items which word is sWordDel,ignoring the handle
	   {
/*		   nTemp=nFoundPos-1;//Check its previous position
		   while(nTemp>0&&strcmp(m_IndexTable[nPos].pWordItemHead[nFoundPos].sWord,sWordDel)==0)
		   {
				m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency=-1;
				m_pModifyTable[nPos].nDelete+=1;
				nTemp-=1;
		   }
*/		   nTemp=nFoundPos+1;//Check its previous position
		   while(nTemp<m_IndexTable[nPos].nCount&&strcmp(m_IndexTable[nPos].pWordItemHead[nFoundPos].sWord,sWordDel)==0)
		   {
				m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency=-1;
				m_pModifyTable[nPos].nDelete+=1;
				nTemp+=1;
		   }
	   }
	   return true;
   }
   //Operation in the modify table and its items 
   if(FindInModifyTable(nPos,sWordDel,nHandle,&pPre))
   {
	     pCur=m_pModifyTable[nPos].pWordItemHead;
	     if(pPre!=NULL)
			 pCur=pPre->next;
         while(pCur!=NULL && _stricmp(pCur->data.sWord,sWordDel)==0&&(pCur->data.nHandle==nHandle||nHandle<0))
		 {
			 pTemp=pCur;
    		 if(pPre!=NULL)//pCur is the first item
				 pPre->next=pCur->next;
			 else
				 m_pModifyTable[nPos].pWordItemHead=pCur->next;
			 pCur=pCur->next;
			 delete pTemp->data.sWord;//Delete the word
			 delete pTemp;
		 }
	   return true;
   }
   return false;
}
bool CDictionary::DelModified()
{
  PWORD_CHAIN pTemp,pCur;
  if(!m_pModifyTable)
	  return true;
  for(int i=0;i<CC_NUM;i++)
  {
      pCur=m_pModifyTable[i].pWordItemHead;
	  while(pCur!=NULL)
	  {
		  pTemp=pCur;
		  pCur=pCur->next;
		  delete pTemp->data.sWord;
		  delete pTemp;
	  }
  }
  delete [] m_pModifyTable;
  m_pModifyTable=NULL;
  return true;
}
bool CDictionary::IsExist(char *sWord,  int nHandle)
{
   char sWordFind[WORD_MAXLENGTH-2];
   int nPos;
   if(!PreProcessing(sWord, &nPos,sWordFind))
	   return false;
   return(FindInOriginalTable(nPos,sWordFind,nHandle)||FindInModifyTable(nPos,sWordFind,nHandle));
}
bool CDictionary::GetHandle(char *sWord,int *pnCount,int *pnHandle,int *pnFrequency)
{
   char sWordGet[WORD_MAXLENGTH-2];
   int nPos,nFoundPos,nTemp;
   PWORD_CHAIN pPre,pCur;
   *pnCount=0;
   if(!PreProcessing(sWord, &nPos,sWordGet))
	   return false;
   if(FindInOriginalTable(nPos,sWordGet,-1,&nFoundPos))
   {
       pnHandle[*pnCount]=m_IndexTable[nPos].pWordItemHead[nFoundPos].nHandle;
	   pnFrequency[*pnCount]=m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency;
	   *pnCount+=1;
/*	   nTemp=nFoundPos-1;//Check its previous position
	   while(nTemp>0&&strcmp(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)==0)
	   {
		   pnHandle[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nHandle;
		   pnFrequency[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency;
		   *pnCount+=1;
	   	   nTemp-=1;
	   }
*/	   nTemp=nFoundPos+1;//Check its previous position
	   while(nTemp<m_IndexTable[nPos].nCount&&strcmp(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)==0)
	   {
		   pnHandle[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nHandle;
		   pnFrequency[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency;
		   *pnCount+=1;
		   nTemp+=1;
	   }
	   return true;
   }
   //Operation in the index table and its items 
   if(FindInModifyTable(nPos,sWordGet,-1,&pPre))
   {
	     pCur=m_pModifyTable[nPos].pWordItemHead;
	     if(pPre!=NULL)
			 pCur=pPre->next;
         while(pCur!=NULL && _stricmp(pCur->data.sWord,sWordGet)==0)
		 {
			 pnHandle[*pnCount]=pCur->data.nHandle;
			 pnFrequency[*pnCount]=pCur->data.nFrequency;
			 *pnCount+=1;
 			 pCur=pCur->next;
		 }
	   return true;
   }
   return false;
}
/*********************************************************************
 *
 *  Func Name  : FindInOriginalTable
 *
 *  Description: judge the word and handle exist in the inner table and its items
 *              
 *
 *  Parameters : nInnerCode: the inner code of the first CHines char
 *               sWord: the word
 *               nHandle:the handle number
 *               *nPosRet:the position which node is matched

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -