📄 dictionary.cpp
字号:
//#include "stdafx.h"
#include "Dictionary.h"
#include "Utility.h"
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#define CC_NUM 6768
//The number of Chinese Char,including 5 empty position between 3756-3761
#define WORD_MAXLENGTH 100
void main()
{
struct tagWordResult{
char sWord[WORD_MAXLENGTH];
//The word
int nHandle;
//the POS of the word
double dValue;
//The -log(frequency/MAX)
};
typedef struct tagWordResult WORD_RESULT,*PWORD_RESULT;
/*data structure for word item*/
struct tagWordItem{
int nWordLen;
char *sWord;
//The word
int nHandle;
//the process or information handle of the word
int nFrequency;
//The count which it appear
};
typedef struct tagWordItem WORD_ITEM,*PWORD_ITEM;
/*data structure for dictionary index table item*/
struct tagIndexTable{
int nCount;
//The count number of words which initial letter is sInit
PWORD_ITEM pWordItemHead;
//The head of word items
};
typedef struct tagIndexTable INDEX_TABLE;
/*data structure for word item chain*/
struct tagWordChain{
WORD_ITEM data;
struct tagWordChain *next;
};
typedef struct tagWordChain WORD_CHAIN,*PWORD_CHAIN;
/*data structure for dictionary index table item*/
struct tagModifyTable{
int nCount;
//The count number of words which initial letter is sInit
int nDelete;
//The number of deleted items in the index table
PWORD_CHAIN pWordItemHead;
//The head of word items
};
typedef struct tagModifyTable MODIFY_TABLE,*PMODIFY_TABLE;
INDEX_TABLE m_IndexTable[CC_NUM];
// PMODIFY_TABLE m_pModifyTable;
FILE *fp;
int i,j,nBuffer[3];
if((fp=fopen("coreDict.dct","rb"))==NULL)
printf("kkkkkkkkk"); //fail while opening the file
memset(m_IndexTable,0,sizeof(m_IndexTable));
printf("装入内存");
for(i=0;i<CC_NUM;i++)
{
fread(&(m_IndexTable[i].nCount),sizeof(int),1,fp);
if(m_IndexTable[i].nCount>0)
m_IndexTable[i].pWordItemHead=new WORD_ITEM[m_IndexTable[i].nCount];
else
{
m_IndexTable[i].pWordItemHead=0;
continue;
}
j=0;
while(j<m_IndexTable[i].nCount)
{
fread(nBuffer,sizeof(int),3,fp);
m_IndexTable[i].pWordItemHead[j].sWord=new char[nBuffer[1]+1];
if(nBuffer[1])//String length is more than 0
{
fread(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
}
m_IndexTable[i].pWordItemHead[j].sWord[nBuffer[1]]=0;
m_IndexTable[i].pWordItemHead[j].nFrequency=nBuffer[0];
m_IndexTable[i].pWordItemHead[j].nWordLen=nBuffer[1];
m_IndexTable[i].pWordItemHead[j].nHandle=nBuffer[2];
j+=1;//Get next item in the original table.
}
}
fclose(fp);
printf("装入完毕");
FILE *fp1;
int a,b,bBuffer[3];
// PWORD_CHAIN pCur;
//strcat(sFilename,".sav");
if((fp1=fopen("coreDict.txt","wt"))==NULL)
printf("bbbb"); //fail while opening the file
printf("准备输出....");
for(a=0;a<CC_NUM;a++)
{char c1,c2,c3,c4;
c1=a/94+176;
c2=a%94+161;
// fwrite(&m_IndexTable[a].nCount,sizeof(int),1,fp1);
fprintf(fp1,"%d\n",m_IndexTable[a].nCount);
//write to the file
b=0;
while(b<m_IndexTable[a].nCount)
{
bBuffer[0]=m_IndexTable[a].pWordItemHead[b].nFrequency;
bBuffer[1]=m_IndexTable[a].pWordItemHead[b].nWordLen;
bBuffer[2]=m_IndexTable[a].pWordItemHead[b].nHandle;
c3=bBuffer[2]/256;
c4=bBuffer[2]%256;
// c3=bBuffer[2];
// c4=bBuffer[2]%256;
// fwrite(bBuffer,sizeof(int),3,fp1);
fprintf(fp1,"%d\t%d\t%d\t",bBuffer[0],bBuffer[1],bBuffer[2]/*,c3,c4*/);
//fprintf(fp1,"词频为:%d\t词长为:%d\t词标注为:%c%c\t",bBuffer[0],bBuffer[1],c3,c4);
// if(bBuffer[1])//String length is more than 0
// fwrite(m_IndexTable[a].pWordItemHead[b].sWord,sizeof(char),bBuffer[1],fp1);
fprintf(fp1,"\t%c%c%s\n",c1,c2,m_IndexTable[a].pWordItemHead[b].sWord);
b+=1;//Get next item in the original table.
}
}
fclose(fp1);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -