⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dictionary.cpp

📁 这是一个能吧中文信息处理中的以二进制形式存储的词典
💻 CPP
字号:

//#include "stdafx.h"
#include "Dictionary.h"
#include "Utility.h"
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#define CC_NUM  6768
//The number of Chinese Char,including 5 empty position between 3756-3761
#define WORD_MAXLENGTH 100
 

void main()
{


 struct tagWordResult{
	char sWord[WORD_MAXLENGTH];
	//The word 
	int nHandle;
	//the POS of the word
	double  dValue;
	//The -log(frequency/MAX)
};
typedef struct tagWordResult WORD_RESULT,*PWORD_RESULT;

/*data structure for word item*/
struct tagWordItem{
	int nWordLen;
	char *sWord;
	//The word 
	int nHandle;
	//the process or information handle of the word
	int  nFrequency;
	//The count which it appear
};
typedef struct tagWordItem WORD_ITEM,*PWORD_ITEM;
/*data structure for dictionary index table item*/
struct tagIndexTable{
    int nCount;
	//The count number of words which initial letter is sInit
    PWORD_ITEM pWordItemHead;
	//The  head of word items
};
typedef struct tagIndexTable INDEX_TABLE;

/*data structure for word item chain*/
struct tagWordChain{
       WORD_ITEM data;
       struct tagWordChain *next;
};
typedef struct tagWordChain WORD_CHAIN,*PWORD_CHAIN;
/*data structure for dictionary index table item*/
struct tagModifyTable{
    int nCount;
	//The count number of words which initial letter is sInit
	int nDelete;
    //The number of deleted items in the index table
	PWORD_CHAIN pWordItemHead;
	//The  head of word items
};
typedef struct tagModifyTable MODIFY_TABLE,*PMODIFY_TABLE;

	INDEX_TABLE   m_IndexTable[CC_NUM];
//    PMODIFY_TABLE m_pModifyTable;	
	
	FILE *fp;
   int i,j,nBuffer[3];
   if((fp=fopen("coreDict.dct","rb"))==NULL)
	   printf("kkkkkkkkk"); //fail while opening the file
 memset(m_IndexTable,0,sizeof(m_IndexTable));
 
  printf("装入内存"); 
   for(i=0;i<CC_NUM;i++)
   {
	   fread(&(m_IndexTable[i].nCount),sizeof(int),1,fp);
       if(m_IndexTable[i].nCount>0)
	     m_IndexTable[i].pWordItemHead=new WORD_ITEM[m_IndexTable[i].nCount];
	   else 
	   {
		   m_IndexTable[i].pWordItemHead=0;
		   continue;
	   }
       j=0;
	   while(j<m_IndexTable[i].nCount)
	   {
         fread(nBuffer,sizeof(int),3,fp);
         m_IndexTable[i].pWordItemHead[j].sWord=new char[nBuffer[1]+1];
  		 if(nBuffer[1])//String length is more than 0
		 {
			 fread(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
		 }
		 m_IndexTable[i].pWordItemHead[j].sWord[nBuffer[1]]=0;
  	   
              m_IndexTable[i].pWordItemHead[j].nFrequency=nBuffer[0];
		 m_IndexTable[i].pWordItemHead[j].nWordLen=nBuffer[1];
		 m_IndexTable[i].pWordItemHead[j].nHandle=nBuffer[2];
 		 j+=1;//Get next item in the original table.
	   }
   }
   fclose(fp);
  printf("装入完毕"); 
   FILE *fp1;
   int a,b,bBuffer[3];
//   PWORD_CHAIN pCur;
  //strcat(sFilename,".sav");
   if((fp1=fopen("coreDict.txt","wt"))==NULL)
	   printf("bbbb"); //fail while opening the file
   
   
   printf("准备输出...."); 
   
   for(a=0;a<CC_NUM;a++)
   {char c1,c2,c3,c4;
   c1=a/94+176;
   c2=a%94+161;
	  
		 //  fwrite(&m_IndexTable[a].nCount,sizeof(int),1,fp1);
		  fprintf(fp1,"%d\n",m_IndexTable[a].nCount);
		   //write to the file
           b=0;  
		   while(b<m_IndexTable[a].nCount)
		   {
			 bBuffer[0]=m_IndexTable[a].pWordItemHead[b].nFrequency;
		     bBuffer[1]=m_IndexTable[a].pWordItemHead[b].nWordLen;
			 bBuffer[2]=m_IndexTable[a].pWordItemHead[b].nHandle;
             c3=bBuffer[2]/256;
			 c4=bBuffer[2]%256;
//            c3=bBuffer[2];
			// c4=bBuffer[2]%256; 
            // fwrite(bBuffer,sizeof(int),3,fp1);
			 fprintf(fp1,"%d\t%d\t%d\t",bBuffer[0],bBuffer[1],bBuffer[2]/*,c3,c4*/);
 //fprintf(fp1,"词频为:%d\t词长为:%d\t词标注为:%c%c\t",bBuffer[0],bBuffer[1],c3,c4);
		//	 if(bBuffer[1])//String length is more than 0
  				//	fwrite(m_IndexTable[a].pWordItemHead[b].sWord,sizeof(char),bBuffer[1],fp1);
			 fprintf(fp1,"\t%c%c%s\n",c1,c2,m_IndexTable[a].pWordItemHead[b].sWord);
 			b+=1;//Get next item in the original table.
		   }
	    
   }
   fclose(fp1);
  
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -