⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dictionary.cpp

📁 最短路径法分词程序。将中文句子经过原子切分后生成一个有向无环图
💻 CPP
字号:
#include "stdafx.h"
#include "Dictionary.h"
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CDictionary::CDictionary()
{
  memset(m_IndexTable,0,sizeof(m_IndexTable));
}

CDictionary::~CDictionary()
{
	for(int i=0;i<CC_NUM;i++)
	{//delete the memory of word item array in the dictionary
		for(int j=0;j<m_IndexTable[i].nCount;j++)
			delete m_IndexTable[i].pWordItemHead[j].sWord;
		delete [] m_IndexTable[i].pWordItemHead;
	}
}
bool CDictionary::Load(char *sFilename)
{
   FILE *fp;
   int i,j,nBuffer[3];
   if((fp=fopen(sFilename,"rb"))==NULL)
	   return false;//fail while opening the file
   	
   //Release the memory for new files
   for( i=0;i<CC_NUM;i++)
	{//delete the memory of word item array in the dictionary
		for( j=0;j<m_IndexTable[i].nCount;j++)
			delete m_IndexTable[i].pWordItemHead[j].sWord;
		delete [] m_IndexTable[i].pWordItemHead;
	}
   for(i=0;i<CC_NUM;i++)
   {
	   fread(&(m_IndexTable[i].nCount),sizeof(int),1,fp);
       if(m_IndexTable[i].nCount>0)
	     m_IndexTable[i].pWordItemHead=new WORD_ITEM[m_IndexTable[i].nCount];
	   else 
	   {
		   m_IndexTable[i].pWordItemHead=0;
		   continue;
	   }
       j=0;
	   while(j<m_IndexTable[i].nCount)
	   {
         fread(nBuffer,sizeof(int),3,fp);
         m_IndexTable[i].pWordItemHead[j].sWord=new char[nBuffer[1]+1];
  		 if(nBuffer[1])//String length is more than 0
		 {
			 fread(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
		 }
		 m_IndexTable[i].pWordItemHead[j].sWord[nBuffer[1]]=0;
  	     m_IndexTable[i].pWordItemHead[j].nFrequency=nBuffer[0];
		 m_IndexTable[i].pWordItemHead[j].nWordLen=nBuffer[1];
		 m_IndexTable[i].pWordItemHead[j].nHandle=nBuffer[2];
 		 j+=1;//Get next item in the original table.
	   }
   }
   fclose(fp);
   return true;
}
bool CDictionary::Find(char *sWord)
{
	int nInnerCode=CC_ID(sWord[0],sWord[1]);
	PWORD_ITEM pItems=m_IndexTable[nInnerCode].pWordItemHead;
	int nStart=0,nEnd=m_IndexTable[nInnerCode].nCount-1,nMid=(nStart+nEnd)/2,nCount=0,nCmpValue;
	
	
	if (strstr("。,、;:?!…—·‘’“”~〔〕〈〉《》「」『』〖〗【】()[]{}",sWord))
		return true;
	while(nStart<=nEnd)//Binary search
	{
		nCmpValue=strcmp(pItems[nMid].sWord,sWord+2);
		if(nCmpValue==0)
			return true;   
		else 
			if(nCmpValue<0)
				nStart=nMid+1;
			else 
				if(nCmpValue>0)
					nEnd=nMid-1;
	   nMid=(nStart+nEnd)/2;
	}
	return false;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -