⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dictionary.h

📁 最短路径法分词程序。将中文句子经过原子切分后生成一个有向无环图
💻 H
字号:
#if !defined(AFX_DICTIONARY_H__80E88BC1_784E_4C96_868B_D7CD66DD6725__INCLUDED_)
#define AFX_DICTIONARY_H__80E88BC1_784E_4C96_868B_D7CD66DD6725__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#define CC_NUM  6768
//The number of Chinese Char,including 5 empty position between 3756-3761
#define WORD_MAXLENGTH 100
#define MAX_SENTENCE_LEN 2000
#define WT_DELIMITER 0
#define WT_CHINESE   1
#define WT_OTHER     2
#define CC_ID(c1,c2) ((unsigned char)(c1)-176)*94+((unsigned char)(c2)-161)
//The ID equation of Chinese Char 
#define CC_CHAR1(id) (id)/94+176
//The first char computed by the Chinese Char ID
#define CC_CHAR2(id) (id)%94+161
//The second char computed by the Chinese Char ID 

/*data structure for word segmentation and tag result*/
//Add in 2002-6-20
/*data structure for word item*/
struct tagWordItem{
	int nWordLen;
	char *sWord;
	//The word 
	int nHandle;
	//the process or information handle of the word
	int  nFrequency;
	//The count which it appear
};
typedef struct tagWordItem WORD_ITEM,*PWORD_ITEM;
/*data structure for dictionary index table item*/
struct tagIndexTable{
    int nCount;
	//The count number of words which initial letter is sInit
    PWORD_ITEM pWordItemHead;
	//The  head of word items
};
typedef struct tagIndexTable INDEX_TABLE;


class CDictionary  
{
public:
	bool Load(char *sFilename);
	bool Find(char *sWord);
	CDictionary();
    virtual ~CDictionary();
	INDEX_TABLE   m_IndexTable[CC_NUM];
};

#endif // !defined(AFX_DICTIONARY_H__80E88BC1_784E_4C96_868B_D7CD66DD6725__INCLUDED_)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -