📄 classifierparam.cpp
字号:
// ClassifierParam.cpp: implementation of the CClassifierParam class.
//
//////////////////////////////////////////////////////////////////////
//分类器参数
#include "stdafx.h"
#include "ClassifierParam.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
const int CClassifierParam::nOpDocMode = 0; // based on document number model
const int CClassifierParam::nOpWordMode = 1; // based on word number model
const int CClassifierParam::nFS_IGMode = 0; // Information gain feature selection
const int CClassifierParam::nFS_MIMode = 1; // Mutual Informaiton feature selection
const int CClassifierParam::nFS_CEMode = 2; // Cross Entropy for text feature selection
const int CClassifierParam::nFS_X2Mode = 3; // X^2 Statistics feature selection
const int CClassifierParam::nFS_WEMode = 4; // Weight of Evielence for text feature selection
const int CClassifierParam::nFS_XXMode = 5; // Right half of IG
const int CClassifierParam::nFSM_GolbalMode=0; // 全局选
const int CClassifierParam::nFSM_IndividualModel=1; // 单独选
const int CClassifierParam::nCT_Unknown=-1; // Unknown
const int CClassifierParam::nCT_KNN=0; // KNN
const int CClassifierParam::nCT_SVM=1; // SVM
const int CClassifierParam::nLT_Chinese=0; // Chinese
const int CClassifierParam::nLT_English=1; // English
const int CClassifierParam::nDF_Directory=0; // Directory
const int CClassifierParam::nDF_Smart=1; // Smart
const int CClassifierParam::nFT_Single=0; // Single Classification
const int CClassifierParam::nFT_Multi=1; // Multiple Classification
const int CClassifierParam::nWM_TF_IDF=0; // TF*IDF
const int CClassifierParam::nWM_TF_DIFF=1; // TF*DIFF
const int CClassifierParam::nWM_TF_IDF_DIFF=2; // TF*IDF*DIFF
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CClassifierParam::CClassifierParam()
{
//训练时需要使用的参数
m_txtTrainDir = _T("");
m_txtResultDir = _T("");
m_nFSMode = CClassifierParam::nFS_IGMode;
m_nWordSize = 1000;
m_nSelMode=CClassifierParam::nFSM_GolbalMode;
m_nOpMode=CClassifierParam::nOpDocMode;
m_nLanguageType=CClassifierParam::nLT_Chinese;
m_bStem=FALSE;
m_nWeightMode=0;
//分类时需要使用的参数
m_nClassifyType=0;
m_bEvaluation=TRUE;
m_bCopyFiles=FALSE;
m_strTestDir=_T("");
m_strResultDir=_T("");
m_strModelFile=_T("");
m_nDocFormat=CClassifierParam::nDF_Directory;
m_nKNN=35;
m_dThreshold=60;
m_nClassifierType=-1;
}
CClassifierParam::~CClassifierParam()
{
}
void CClassifierParam::Serialize(CArchive &ar)
{
if(ar.IsStoring())
{
//训练时需要使用的参数
ar<<m_txtTrainDir;
ar<<m_txtResultDir;
ar<<m_nFSMode;
ar<<m_nWordSize;
ar<<m_nSelMode;
ar<<m_nOpMode;
ar<<m_nLanguageType;
ar<<m_bStem;
ar<<m_nWeightMode;
//分类时需要使用的参数
ar<<m_nClassifyType;
ar<<m_bEvaluation;
ar<<m_bCopyFiles;
ar<<m_strTestDir;
ar<<m_strResultDir;
ar<<m_strModelFile;
ar<<m_nDocFormat;
ar<<m_nKNN;
ar<<m_dThreshold;
ar<<m_nClassifierType;
}
else
{
//训练时需要使用的参数
ar>>m_txtTrainDir;
ar>>m_txtResultDir;
ar>>m_nFSMode;
ar>>m_nWordSize;
ar>>m_nSelMode;
ar>>m_nOpMode;
ar>>m_nLanguageType;
ar>>m_bStem;
ar>>m_nWeightMode;
//分类时需要使用的参数
ar>>m_nClassifyType;
ar>>m_bEvaluation;
ar>>m_bCopyFiles;
ar>>m_strTestDir;
ar>>m_strResultDir;
ar>>m_strModelFile;
ar>>m_nDocFormat;
ar>>m_nKNN;
ar>>m_dThreshold;
ar>>m_nClassifierType;
}
}
void CClassifierParam::DumpToFile(CString strFileName)
{
CFile fBinOut;
if(!fBinOut.Open(strFileName,CFile::modeWrite | CFile::modeCreate) )
{
AfxMessageBox( "无法创建文件"+strFileName+"!") ;
return;
}
CArchive ar(&fBinOut,CArchive::store);
Serialize(ar);
ar.Close();
fBinOut.Close();
}
bool CClassifierParam::GetFromFile(CString strFileName)
{
CFile fBinIn;
if(!fBinIn.Open(strFileName,CFile::modeRead) )
{
AfxMessageBox( "无法打开文件"+strFileName+"!");
return false;
}
CArchive ar(&fBinIn,CArchive::load);
Serialize(ar);
ar.Close();
fBinIn.Close();
return true;
}
void CClassifierParam::GetParamString(CString &strParam)
{
strParam ="训练文档目录:\t\t"+m_txtTrainDir+"\r\n";
strParam+="训练结果目录:\t\t"+m_txtResultDir+"\r\n";
strParam+="概率估算方法:\t\t";
if(m_nOpMode==CClassifierParam::nOpDocMode)
strParam+="基于文档统计\r\n";
else if(m_nOpMode==CClassifierParam::nOpWordMode)
strParam+="基于词频统计\r\n";
else
strParam+="未知\r\n";
strParam+="特征选择方法:\t\t";
if(m_nFSMode==CClassifierParam::nFS_IGMode)
strParam+="信息增益\r\n";
else if(m_nFSMode==CClassifierParam::nFS_MIMode)
strParam+="互信息\r\n";
else if(m_nFSMode==CClassifierParam::nFS_CEMode)
strParam+="期望交叉熵\r\n";
else if(m_nFSMode==CClassifierParam::nFS_X2Mode)
strParam+="X^2统计\r\n";
else if(m_nFSMode==CClassifierParam::nFS_WEMode)
strParam+="文本证据权重\r\n";
else if(m_nFSMode==CClassifierParam::nFS_XXMode)
strParam+="右半信息增益\r\n";
else
strParam+="未知\r\n";
strParam+="特征选择方式:\t\t";
if(m_nSelMode==CClassifierParam::nFSM_GolbalMode)
strParam+="全局选取\r\n";
else if(m_nSelMode==CClassifierParam::nFSM_IndividualModel)
strParam+="按类别单独选取\r\n";
else
strParam+="未知\r\n";
strParam+="文档语言种类:\t\t";
if(m_nLanguageType==CClassifierParam::nLT_Chinese)
strParam+="中文\r\n";
else if(m_nLanguageType==CClassifierParam::nLT_English)
{
strParam+="英文\r\n";
if(m_bStem)
strParam+="是否词干抽取:\t\t是\r\n";
else
strParam+="是否词干抽取:\t\t否\r\n";
}
else
strParam+="未知\r\n";
strParam+="特征加权算法:\t\t";
if(m_nWeightMode==CClassifierParam::nWM_TF_IDF)
strParam+="TF*IDF\r\n";
else if(m_nWeightMode==CClassifierParam::nWM_TF_DIFF)
strParam+="TF*特征评估函数值\r\n";
else if(m_nWeightMode==CClassifierParam::nWM_TF_IDF_DIFF)
strParam+="TF*IDF*特征评估函数值\r\n";
else
strParam+="未知\r\n";
CString strWordSize;
strWordSize.Format("特征空间维数:\t\t%d\r\n",m_nWordSize);
strParam+=strWordSize;
if(m_nClassifierType==CClassifierParam::nCT_KNN)
strParam+="分类器类型: \t\tKNN\r\n";
else if(m_nClassifierType==CClassifierParam::nCT_SVM)
strParam+="分类器类型: \t\tSVM\r\n";
else
strParam="请先打开一个分类模型文件!";
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -