📄 annotator.h
字号:
// Annotator.h: interface for the CAnnotator class.
//
//////////////////////////////////////////////////////////////////////
#if !defined(AFX_ANNOTATOR_H__4705F5E4_69B7_40E6_80F0_97138655CA1B__INCLUDED_)
#define AFX_ANNOTATOR_H__4705F5E4_69B7_40E6_80F0_97138655CA1B__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#define S_BEGIN 2
#define ORG 0
#define NONNE 1
#define MINPROB -100000
#include "afxtempl.h"
#include "PreProcess.h"
//extern enum CODE;
class CWord
{
public:
CString m_word;
CODE m_pos;
int m_start;
int m_end;
int m_ne;
};
class CDict : public CObject
{
public:
CString m_word;
CODE m_pos;
int m_count[2];
CDict() {m_count[0] = m_count[1] = 0;};
~CDict() {};
CDict(const CDict&);
CDict& operator = (const CDict&);
};
class CSentence : public CList<CWord, CWord&>
{
public:
CSentence() {};
~CSentence() {};
CSentence(const CSentence&);
CSentence& operator = (const CSentence& s);
};
class CSentenceList : public CList<CSentence, CSentence&>
{
public:
CSentenceList() {};
~CSentenceList() {};
CSentenceList(const CSentenceList&);
CSentenceList& operator = (const CSentenceList& sl);
void print(CString filename);
};
class CTree
{
public:
CDict m_dict;
CTree* m_left;
CTree* m_right;
CTree();
~CTree();
void Insert(CWord word);
void ToFile(CString filename);
protected:
void TraverseToFile(CStdioFile* file);
void MergeFile(CString file1, CString file2);
};
class CStateWord : public CObject
{
public:
CString m_word;
CODE m_pos;
double m_prob[3][3][3];
int m_prev[3][3][3];
CStateWord(CWord w);
CStateWord();
};
class CAnnotator
{
public:
CAnnotator();
CAnnotator(CProgressCtrl* progress);
virtual ~CAnnotator();
void Train(CSentenceList corpus);
CSentenceList Annotate(CSentenceList corpus);
protected:
CProgressCtrl* m_progress;
long m_count[2];
long m_statep[3][3][3][2];
long m_poscount[45][2];
CTree *m_tree;
bool m_dataIn;
CObArray m_data;
void Init();
bool ReadData();
double GetStateProb(int x, int y, int z, int next);
double GetWordProb(CString word, CODE pos, int ne, int totalWordCount);
int GetWordCount(CString word, CODE pos, int ne, int start, int end);
void Eliminate(CSentence& s);
bool WordIn(CString word, CString wordlist[], int number);
};
CDict Extract(CString str);
int WordCompare(CDict& d1, CDict& d2);
int WordCompare(CTree* node, CWord word);
#endif // !defined(AFX_ANNOTATOR_H__4705F5E4_69B7_40E6_80F0_97138655CA1B__INCLUDED_)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -