⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 annotator.h

📁 分词词典软件
💻 H
字号:
// Annotator.h: interface for the CAnnotator class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_ANNOTATOR_H__4705F5E4_69B7_40E6_80F0_97138655CA1B__INCLUDED_)
#define AFX_ANNOTATOR_H__4705F5E4_69B7_40E6_80F0_97138655CA1B__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#define S_BEGIN		2
#define ORG			0
#define NONNE		1
#define MINPROB		-100000

#include "afxtempl.h"
#include "PreProcess.h"
//extern enum CODE;

class CWord
{
public:
	CString m_word;
	CODE m_pos;
	int m_start;
	int m_end;
	int m_ne;
};

class CDict : public CObject
{
public:
	CString m_word;
	CODE m_pos;
	int m_count[2];

	CDict() {m_count[0] = m_count[1] = 0;};
	~CDict() {};
	CDict(const CDict&);
	CDict& operator = (const CDict&);
};


class CSentence : public CList<CWord, CWord&>
{
public:
	CSentence() {};
	~CSentence() {};
	CSentence(const CSentence&);
	CSentence& operator = (const CSentence& s);
};

class CSentenceList : public CList<CSentence, CSentence&>
{
public:
	CSentenceList() {};
	~CSentenceList() {};
	CSentenceList(const CSentenceList&);
	CSentenceList& operator = (const CSentenceList& sl);
	void print(CString filename);
};

class CTree
{
public:
	CDict m_dict;
	CTree* m_left;
	CTree* m_right;

	CTree();
	~CTree();
	void Insert(CWord word);
	void ToFile(CString filename);

protected:
	void TraverseToFile(CStdioFile* file);
	void MergeFile(CString file1, CString file2);
};


class CStateWord : public CObject
{
public:
	CString m_word;
	CODE m_pos;
	double m_prob[3][3][3];
	int m_prev[3][3][3];
	
	CStateWord(CWord w);
	CStateWord();
};


class CAnnotator  
{
public:
	CAnnotator();
	CAnnotator(CProgressCtrl* progress);

	virtual ~CAnnotator();

	void Train(CSentenceList corpus);
	CSentenceList Annotate(CSentenceList corpus);

protected:
	CProgressCtrl* m_progress;
	long m_count[2];
	long m_statep[3][3][3][2];
	long m_poscount[45][2];
	CTree *m_tree;
	bool m_dataIn;
	CObArray m_data;
	
	void Init();
	bool ReadData();
	double GetStateProb(int x, int y, int z, int next);
	double GetWordProb(CString word, CODE pos, int ne, int totalWordCount);
	int GetWordCount(CString word, CODE pos, int ne, int start, int end);
	void Eliminate(CSentence& s);
	bool WordIn(CString word, CString wordlist[], int number);



};


CDict Extract(CString str);
int WordCompare(CDict& d1, CDict& d2);
int WordCompare(CTree* node, CWord word);



#endif // !defined(AFX_ANNOTATOR_H__4705F5E4_69B7_40E6_80F0_97138655CA1B__INCLUDED_)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -