⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clsfydlg.cpp

📁 贝叶斯分类器设计
💻 CPP
字号:
// ClsfyDlg.cpp : implementation file
//

#include "stdafx.h"
#include "sim_tc.h"
#include "ClsfyDlg.h"
#include "FileItem.h"

#include "BayesDlg.h"
#include "SvmDlg.h"

#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#include <time.h>

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/////////////////////////////////////////////////////////////////////////////
// CClsfyDlg

IMPLEMENT_DYNCREATE(CClsfyDlg, CFormView)

CClsfyDlg::CClsfyDlg()
	: CFormView(CClsfyDlg::IDD)
{
	//{{AFX_DATA_INIT(CClsfyDlg)
	//}}AFX_DATA_INIT
	m_ClsFileNum = 0;
	m_ClsFileArray = new CFileArray;
	
	m_isMaxProb = TRUE;
	m_ClsModel = 0;

	m_isSet = FALSE;
	
}

CClsfyDlg::~CClsfyDlg()
{
	int n=m_ClsFileArray->GetSize();
	int i;
	CFileItem* pa;
	if (n>0){
		for (i=n-1; i<0; i--){
			pa = m_ClsFileArray->GetAt(i);
			m_ClsFileArray->RemoveAt(i);
			delete pa;
		}
	}
	m_ClsFileArray->RemoveAll();
}

void CClsfyDlg::DoDataExchange(CDataExchange* pDX)
{
	CFormView::DoDataExchange(pDX);
	//{{AFX_DATA_MAP(CClsfyDlg)
	DDX_Control(pDX, IDC_CLS_TRAINSET, m_TrainSet);
	DDX_Control(pDX, IDC_CLS_START, m_Start);
	DDX_Control(pDX, IDC_CLS_PARA, m_SetPara);
	DDX_Control(pDX, IDC_CLS_FILELIST, m_ClsFileList);
	DDX_Control(pDX, IDC_CLS_CHOOSEFILE, m_ChooseFile);
	DDX_Control(pDX, IDC_CLS_ALG, m_ClsAlgrithm);
	DDX_Control(pDX, IDC_CLS_RESULTGRID, m_ResultGrid);
	//}}AFX_DATA_MAP
}


BEGIN_MESSAGE_MAP(CClsfyDlg, CFormView)
	//{{AFX_MSG_MAP(CClsfyDlg)
	ON_BN_CLICKED(IDC_CLS_CHOOSEFILE, OnClsChoosefile)
	ON_BN_CLICKED(IDC_CLS_PARA, OnClsPara)
	ON_BN_CLICKED(IDC_CLS_START, OnClsStart)
	ON_LBN_DBLCLK(IDC_CLS_FILELIST, OnDblclkClsFilelist)
	ON_CBN_SETFOCUS(IDC_CLS_TRAINSET, OnSetfocusClsTrainset)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CClsfyDlg diagnostics

#ifdef _DEBUG
void CClsfyDlg::AssertValid() const
{
	CFormView::AssertValid();
}

void CClsfyDlg::Dump(CDumpContext& dc) const
{
	CFormView::Dump(dc);
}
#endif //_DEBUG

/////////////////////////////////////////////////////////////////////////////
// CClsfyDlg message handlers

void CClsfyDlg::OnClsChoosefile() 
{
	CFileDialog AddFileDlg(TRUE,"txt","*.txt",NULL,NULL,this);
	if(AddFileDlg.DoModal() == IDOK){
		CFileItem* pFileItem = new CFileItem;
		//pFileItem->m_FileNumber = m_TrainDocNum;
		//AfxMessageBox(m_TrainDocNum);
		pFileItem->m_FileName = AddFileDlg.GetFileName();
		pFileItem->m_FilePath = AddFileDlg.GetPathName();
		m_ClsFileList.InsertString(m_ClsFileNum, pFileItem->m_FileName);
		m_ClsFileNum = m_ClsFileNum + 1;
		m_ClsFileArray->Add(pFileItem);
	}
}

void CClsfyDlg::OnClsPara() 
{
	//分类过程
	CString AlgrithmName;
	if (m_isSet == FALSE) {
		m_isSet = TRUE;
	}
	int nIndex = m_ClsAlgrithm.GetCurSel();
	if (nIndex != CB_ERR){
		switch (nIndex){
			case 0:
			{
				//弹出bayes算法的参数设置的对话框,完成参数设置
				CBayesDlg BayesDlg;
				if (BayesDlg.DoModal() == IDOK){
					if (BayesDlg.m_isMaxProb == 0){
						m_isMaxProb = BayesDlg.m_isMaxProb;
					}
					else {
						m_ProbLimit = BayesDlg.m_ProbLimit;
					}
					m_ClsModel = BayesDlg.m_ClsModel;
				}
				break;
			}
		case 1:
			AfxMessageBox("功能还没有实现!");
			break;
		case 2:
			AfxMessageBox("功能还没有实现");
			break;
		case 3:
			AfxMessageBox("功能还没有实现");
			break;
		default:
			break;
		}
	}
}

void CClsfyDlg::OnClsStart() 
{
	CString AlgrithmName;
	CString CurTrainSet;
	_variant_t RecordsAffected;
	_variant_t vIndex = (long)0;


	int nIndexAlgorithm,nIndexTrainSet,nIndexClsFile;
	
	nIndexAlgorithm = m_ClsAlgrithm.GetCurSel();
	nIndexTrainSet = m_TrainSet.GetCurSel();
	nIndexClsFile = m_ClsFileList.GetCount();
	
	if (nIndexAlgorithm == CB_ERR){
		AfxMessageBox("请选择算法!");
	}
	else if (nIndexTrainSet == CB_ERR){
		AfxMessageBox("请选择训练集!");
	}
	else if (nIndexClsFile == 0){
		AfxMessageBox("请添加待分类的文档!");
	}
	else {
		if (m_isSet == FALSE)
		AfxMessageBox("若您没有进行参数设置,将采用系统默认设置进行分类!");
		if (nIndexAlgorithm != CB_ERR){
			switch (nIndexAlgorithm){
				case 0:
				{	
					if (m_ClsModel == 1){
						m_TrainSet.GetLBText(nIndexTrainSet,CurTrainSet);
						MultinomalBayesTrain(CurTrainSet);
						MultinomalBayesClassification(m_ClsFileArray, CurTrainSet, m_ClsFileNum);
						
					}
					else{
					}
					break;
				}
				case 1:
					AfxMessageBox("功能还没有实现!");
					break;
				case 2:
					AfxMessageBox("功能还没有实现");
					break;
				case 3:
					AfxMessageBox("功能还没有实现");
					break;
				default:
					break;
			}
		}
	}
}

void CClsfyDlg::OnInitialUpdate() 
{
	CFormView::OnInitialUpdate();
	DisplayTrainSet();
	
}

void CClsfyDlg::DisplayTrainSet()
{
	_variant_t RecordsAffected;
	_variant_t vDatasetName;
	CString strSQL;
	CString strDatasetName;
	_variant_t vIndex = (long)0;

    CComboBox* pComboBox = (CComboBox*) GetDlgItem(IDC_CLS_TRAINSET);
	ASSERT(pComboBox != NULL);
	
	pComboBox->ResetContent();
	
	strSQL = "select name from sysobjects where xtype='u' and name<>'dtproperties'";
	CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)strSQL,&RecordsAffected,adCmdText);
	
	if (!CSim_tcApp::AdoDBObject.m_bIsConnected){
		AfxMessageBox("数据库还没有连上,请关闭程序重试!");
		return;
	}
	while(!CSim_tcApp::AdoDBObject.m_pRecordset->adoEOF){
		vDatasetName = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect("name");///取得用户自定义的表的名字	
		strDatasetName = vDatasetName.bstrVal;
		pComboBox->AddString( strDatasetName );
		CSim_tcApp::AdoDBObject.m_pRecordset->MoveNext();///移到下一条记录
	}

	CSim_tcApp::AdoDBObject.m_pRecordset->Close();
	Invalidate(FALSE);
} 

void CClsfyDlg::OnDblclkClsFilelist() 
{
	// TODO: Add your control notification handler code here
	int nIndex = m_ClsFileList.GetCurSel();
	CFileItem* pa;
	pa = m_ClsFileArray->GetAt(nIndex);
	if (nIndex != LB_ERR){
		m_ClsFileList.DeleteString(nIndex);
		m_ClsFileArray->RemoveAt(nIndex);
		delete pa;
		m_ClsFileNum = m_ClsFileNum - 1;
	}
}

void CClsfyDlg::DisplayClsAlgrithm()
{
}

void CClsfyDlg::OnSetfocusClsTrainset() 
{
	// TODO: Add your control notification handler code here
	DisplayTrainSet();
}

void CClsfyDlg::MultinomalBayesClassification(CFileArray* pFileHead, CString CurTrainSet, int ClsFileNum)
{

	//获得类别标号的信息,存放在数组ClassLabelArray中
	_variant_t RecordsAffected;
	_variant_t vAttrNum;
	_variant_t vClassNum;
	_variant_t vClassLabel;
	_variant_t vIndex = (long)0;
	StringArray ClassLabelArray;
	CString SqlGetClassLabel = "";
	CString ClassLabel="";
	
	int DocNum = 0;
	//查询数据集的属性个数
	CString SqlCheckTrainSetDim = "";
	int TrainFileDim = 0;
	SqlCheckTrainSetDim = "select count(name) from syscolumns where syscolumns.id in (select id from sysobjects where name = '" + CurTrainSet + "')"; //查询属性个数
	CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlCheckTrainSetDim,&RecordsAffected,adCmdText);
	vAttrNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
	TrainFileDim = vAttrNum.lVal;
	TrainFileDim = TrainFileDim - 2;
	CSim_tcApp::AdoDBObject.m_pRecordset->Close();
	
	//设定放大因数
	double MagnifyFactor = 1.0E30; //避免结果太小,无法比较

	// 查询类别个数
	SqlGetClassLabel = "select count(distinct classlabel) from   " + CurTrainSet; 
	CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
	vClassNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
	int ClassNum = vClassNum.lVal;
	CSim_tcApp::AdoDBObject.m_pRecordset->Close();
	ClassLabelArray.SetSize(ClassNum,5);
	DoubleArray ClassificationResult;
	ClassificationResult.SetSize(ClassNum,5);

	//查询类别名称
	SqlGetClassLabel = "select distinct classlabel from   " + CurTrainSet; 
	CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
	int i=0;
	while(!CSim_tcApp::AdoDBObject.m_pRecordset->adoEOF){
		vClassLabel = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
		ClassLabel = vClassLabel.bstrVal;
		ClassLabel.TrimLeft();
		ClassLabel.TrimRight();
		ClassLabelArray.SetAt(i,ClassLabel);
		i++;
		CSim_tcApp::AdoDBObject.m_pRecordset->MoveNext();
	}
	CSim_tcApp::AdoDBObject.m_pRecordset->Close();
	ClassLabelArray.FreeExtra();
	
	//填充表头
	CString temp;//temp存放classlabel的名字
	for(i=1;i<=ClassNum;i++){
		temp = ClassLabelArray.GetAt(i-1);
		m_ResultGrid.SetTextMatrix(0,i,temp);
	}
	m_ResultGrid.SetTextMatrix(0,i,"Result");
	UpdateData(FALSE);
	int MatrixRowNum = 1; 
	int ResultIndex = 0;

	//依次读取分类文件中一个文件的一行,表示读入一个文件的词频向量
	CString ATextLine = "";
	CFileItem* pFileItem;
	BOOL NotAtEnd;
	BOOL isBeginning = TRUE;
	double total = 0.0;
	double FirstClassProb = 0.0;
	double LatterClassProb = 0.0;
	int MaxIndex = 0;
	for (int indexFile=0; indexFile<ClsFileNum; indexFile++){
		pFileItem = pFileHead->GetAt(indexFile);
		CStdioFile ClsDocItem(pFileItem->m_FilePath,CFile::modeRead);
		
		NotAtEnd = ClsDocItem.ReadString(ATextLine);//首先获取第一行
		
		//根据第一行判断文本的维数
		int DocVectorDim;
		DocVectorDim = GetVectorDim(ATextLine); //获得数组的维数
		if (TrainFileDim != DocVectorDim){
			AfxMessageBox("分类文档与训练集的维数不相等!");
		}
		int TextLineLength;//从文件读入的一行的长度
		while (NotAtEnd){
		//读入内存中,开辟一个连续空间存储向量DocVectorArray	
			IntArray DocVectorArray;		
			DocVectorArray.SetSize(DocVectorDim,50);
			ATextLine.TrimLeft();
			ATextLine.TrimRight();
			int j = 0;//j 用来标志ATextLine中字符的位置
			TextLineLength = ATextLine.GetLength();
			if (TextLineLength != 0){//读入的长度大于0,则将内容导入数组
				CString strVectorAttr = "";
				char GetChar;
				int iVectorAttr = 0.0;
				int IndexVector = 0;
				for (j=0; j<TextLineLength; j++){//for循环把读入内存的一行文本转换成浮点数组
					GetChar = ATextLine.GetAt(j);
					if ((GetChar == ' ') || (GetChar =='\0') || (GetChar ==',')){
						iVectorAttr = atoi(strVectorAttr);
						DocVectorArray.SetAt(IndexVector,iVectorAttr);
						IndexVector++;
						strVectorAttr = "";
					}
					else {
						strVectorAttr = strVectorAttr + GetChar;
					}
					//AfxMessageBox(tt);	
				}//for循环结束
				iVectorAttr = atof(strVectorAttr);
				DocVectorArray.SetAt(IndexVector,iVectorAttr);
				DocVectorArray.FreeExtra();
			}//到这里文件的一行就转换成浮点数组
			
			//开始分类
			DocNum++; //DocNum为分类文件的标号
			char buffer[20];
			_itoa( DocNum, buffer, 10 );
			m_ResultGrid.SetTextMatrix(DocNum,0,buffer);
			
			CString SqlGetAColumn = "";
			_variant_t vColumnProb;
			float fColumnProb;
			float mutipleResult = 1.0;
			int Exponent = 0;
			float Base;
			for (i=0; i<ClassNum; i++){    
				temp = "class_" + ClassLabelArray.GetAt(i);
				SqlGetAColumn = "Select " + temp + " from ";
				SqlGetAColumn = SqlGetAColumn + CurTrainSet +"_MultinomalResult";
				//AfxMessageBox(SqlGetAColumn);
				CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlGetAColumn,&RecordsAffected,adCmdText);
				vColumnProb = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(_bstr_t(temp));
				fColumnProb = vColumnProb.dblVal;
				fColumnProb = fColumnProb * MagnifyFactor;
			
				j=0;
				CSim_tcApp::AdoDBObject.m_pRecordset->MoveNext();
				mutipleResult = 1.0;
				while ((!CSim_tcApp::AdoDBObject.m_pRecordset->adoEOF) && (j<DocVectorDim)){
					vColumnProb = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(_bstr_t(temp));
					Base = vColumnProb.dblVal;
					Exponent = DocVectorArray.GetAt(j);
					mutipleResult = mutipleResult * pow(Base,Exponent);
					CSim_tcApp::AdoDBObject.m_pRecordset->MoveNext();
					j++;
				}//while 
				mutipleResult = mutipleResult * fColumnProb;
				ClassificationResult.SetAt(i,mutipleResult);
				
				//m_ResultGrid.SetTextMatrix(DocNum,i+1,buffer);
				CSim_tcApp::AdoDBObject.m_pRecordset->Close();
				fColumnProb = 1.0;
			}
			//对Classification数组进行排序,选出最大的作为文本的类属
			for (i=0; i<ClassNum; i++){
				total = total + ClassificationResult.GetAt(i);
			}
			FirstClassProb = 1.0E-10;
			for (i=0; i<ClassNum; i++){
				LatterClassProb = ClassificationResult.GetAt(i);
				LatterClassProb = LatterClassProb/total;
				if (LatterClassProb>FirstClassProb){
					FirstClassProb = LatterClassProb;
					MaxIndex = i;
				}
				ClassificationResult.SetAt(i, LatterClassProb);
				_gcvt(LatterClassProb, 7, buffer);
				//AfxMessageBox(buffer);
				m_ResultGrid.SetTextMatrix(DocNum,i+1,buffer);
			}
			temp = ClassLabelArray.GetAt(MaxIndex);
			m_ResultGrid.SetTextMatrix(DocNum,i+1,temp);
			//DoubleArray * pClsResult;
			//pClsResult = &ClassificationResult;
			//ResultIndex = SortClsResult(pClsResult);
			//temp = ClassLabelArray.GetAt(ResultIndex);
			//m_ResultGrid.SetTextMatrix(DocNum,i+1,temp);
			NotAtEnd = ClsDocItem.ReadString(ATextLine);//第一个读入的文本分类结束,接着读入下一行
		}//	while (NotAtEnd)
		ClsDocItem.Close();
	}
	//利用已有的训练数据,计算分类结果数据,存储到当前目录下的临时文件中temp.txt
	//将每个文件对于每一类的概率也依次写入到另一个数组TextPostProbArray中,便于确定类别
	//将所有文件所属的类别记录到一个数组FileClassArray
	//AfxMessageBox("complete!");
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -