⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stdafx.cpp

📁 贝叶斯分类器设计
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// stdafx.cpp : source file that includes just the standard includes
//	sim_tc.pch will be the pre-compiled header
//	stdafx.obj will contain the pre-compiled type information

#include "stdafx.h"
#include "sim_tc.h"
#include <afx.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

//为分类结果排序
int SortClsResult(DoubleArray* ClsResult)
{
	int n = ClsResult->GetSize();
	int FirstValue = ClsResult->GetAt(0);
	int Index = 0;
	int LatterValue;
	for (int i=1; i<n; i++){
		//LatterValue = ClsResult.GetAt(i); 
		LatterValue = ClsResult->GetAt(i);
		if (LatterValue > FirstValue){
			FirstValue = LatterValue;
			Index = i;
		}
	}
	return Index;
}

//计算分类文本的维数
int GetVectorDim(CString VectorText)
{
	int i=0,TextLength=0,TextDim=0;
	char indexChar;
	TextLength = VectorText.GetLength();
	for (i=0; i<TextLength; i++){
		indexChar = VectorText.GetAt(i);
		if ((indexChar == ',')||(indexChar == ';')){
			TextDim++;
		}
	}
	TextDim ++;
	return TextDim;
}


int ReadTrainDocDim(CFileItem* pFileItem)
{
	CStdioFile TrainDocItem(pFileItem->m_FilePath,CFile::modeRead);
	char buffer[20000];
	//CString str="";
	TrainDocItem.ReadString(buffer,19999);
	int TrainDocDim = 0;
	int i = 0;
	while (buffer[i] != '\n'){
		if (buffer[i] == ',')
			TrainDocDim ++;
		i++;
	}
	TrainDocItem.Close();
	return TrainDocDim;
	//AfxMessageBox(TrainDocDim);
}

//生成创建新的数据集的sql语句
CString SqlCreateTable(int colNumber, CString tableName)
{
	int i;
	CString colName;
	char buffer[20];
	CString strtemp="create table " +tableName +"(";
	for (i=1; i<=colNumber; i++){
		_itoa( i, buffer, 10 );
		colName = (CString)buffer;
		colName = "wd"+colName;
		strtemp = strtemp + colName + " " + "smallint,";
	};
	strtemp = strtemp + "classlabel char(20), rowid int identity(0,1) not null primary key)";
	return strtemp;
}

//ChangeSQL 读出文件的维数,生成插入数据的sql语句
CString ChangeSQL(CString str,int comaNum)
{
	int i=0;
	int j= comaNum;
	//bool stop = false;
	CString FileRecord = str;
	int Length = str.GetLength();
	CString NewSQL;
	while (FileRecord.GetAt(i)!='\0'){
		if (FileRecord.GetAt(i) == ',') j--;
		if (j==0){
			NewSQL = FileRecord.Left(i+1) + "'";
			//stop = true;
			break;
		}
		i++;
	}
	NewSQL = NewSQL + FileRecord.Right(Length-i-1);
	NewSQL = NewSQL.Left(NewSQL.GetLength()-1) + "'";
	return NewSQL;
}

//将文件导入数据库
BOOL AddFileToDB(CFileArray* pFileHead, CString DbName, int m_TrainDocDim)
{
//	CFileArray* pFileHead = m_TrainFileArray;
	CString str1 = "";
	CString str2="";
	CString headSQL="",SQL;
	
	headSQL= "insert into " + DbName +" values(";
	
	int i,j=1; //j为控制while循环的变量
	int n;
	
	n = pFileHead->GetSize();
	for (i=0;i<n;i++){
		CString name= pFileHead->ElementAt(i)->m_FilePath;
		CStdioFile TrainFile(name,CFile::modeRead);
		while (j){
			//TrainFile.ReadString(buffer, 19999);
			TrainFile.ReadString(str2);
			str1 = str2 + '\0';
			SQL = ChangeSQL(str1,m_TrainDocDim);
			//CFile myfile("haha.txt", CFile::modeWrite|CFile::modeCreate);
			//myfile.Write(buffer, sizeof(buffer));
			//str= buffer;
			if (str2!=""){
				SQL =headSQL +SQL +")";
				CSim_tcApp::AdoDBObject.ExecuteSQL(SQL);
				//CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlCreateNewDataset,&RecordsAffected,adCmdText);
				//AfxMessageBox(SQL);
			}
			else
				j=0;
			//str = "";
		}
		TrainFile.Close();
		j=1;
	}
	return TRUE;
}


//Bayes的Multinomal模型训练方法
void MultinomalBayesTrain(CString CurTrainSet)
{
	_variant_t RecordsAffected;
	_variant_t vAttrNum;
	_variant_t vClassNum;
	_variant_t vClassLabel;
	_variant_t vIndex = (long)0;

	CString SqlCheckTrainSetDim="";
	int m_TrainFileDim;
	
	//在classification数据库中建立表存放中间求出的概率结果
	//1.在classification数据库中创建新表,用于存放中间结果
	CString SqlCreateProbTable = "";
	CString SqlCheckTrained = "";
	CString ProbTableName = "",temp="";
	ProbTableName = CurTrainSet + "_MultinomalResult";
	
	_variant_t vCheckResult;
	int CheckResult;
	//检查是否已经训练过
	SqlCheckTrained = "select count(name) from sysobjects where xtype='u' and name<>'dtproperties' and name='" +\
		ProbTableName + "'";
	CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlCheckTrained,&RecordsAffected,adCmdText);
	vCheckResult = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
	CheckResult = vCheckResult.lVal;
	CSim_tcApp::AdoDBObject.m_pRecordset->Close();

	if (CheckResult == 0){
	
	//2.维数(单词的个数)
		SqlCheckTrainSetDim = "select count(name) from syscolumns where syscolumns.id in (select id from sysobjects where name = '" + CurTrainSet + "')"; //查询属性个数
		CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlCheckTrainSetDim,&RecordsAffected,adCmdText);
		vAttrNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
		m_TrainFileDim = vAttrNum.lVal;
		m_TrainFileDim = m_TrainFileDim - 2;
		CSim_tcApp::AdoDBObject.m_pRecordset->Close();
		
		//2.类别的名称
		StringArray ClassLabelArray;
		CString SqlGetClassLabel = "";
		CString ClassLabel="";
		
		// 查询类别个数
		SqlGetClassLabel = "select count(distinct classlabel) from   " + CurTrainSet; 
		CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
		vClassNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
		int ClassNum = vClassNum.lVal;
		CSim_tcApp::AdoDBObject.m_pRecordset->Close();
		ClassLabelArray.SetSize(ClassNum,5);
		//查询类别名称
		SqlGetClassLabel = "select distinct classlabel from   " + CurTrainSet; 
		CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
		int i=0;
		while(!CSim_tcApp::AdoDBObject.m_pRecordset->adoEOF){
			vClassLabel = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
			ClassLabel = vClassLabel.bstrVal;
			ClassLabel.TrimLeft();
			ClassLabel.TrimRight();
			ClassLabelArray.SetAt(i,ClassLabel);
			i++;
			CSim_tcApp::AdoDBObject.m_pRecordset->MoveNext();
		}
		CSim_tcApp::AdoDBObject.m_pRecordset->Close();
		ClassLabelArray.FreeExtra();

	
		SqlCreateProbTable = "Create table " + ProbTableName + "(rowid int identity(0,1) not null primary key, ";
		for (i=0; i<ClassNum; i++){
			temp = "class_" + ClassLabelArray.GetAt(i); 
			SqlCreateProbTable = SqlCreateProbTable + temp + " float,";
		}
		int tempLength = SqlCreateProbTable.GetLength();
		tempLength = tempLength -1;
		SqlCreateProbTable = SqlCreateProbTable.Left(tempLength);
		SqlCreateProbTable = SqlCreateProbTable + ")";
		//AfxMessageBox(SqlCreateProbTable);
		CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlCreateProbTable,&RecordsAffected,adCmdText);
	
		//计算每一个类别出现的先验概率
		_variant_t vAllDocNum;
		_variant_t vAClassDocNum;
		float AllDocNum,AClassDocNum=0.0;
		CString StrClassDocNum,strAClassDocNum;
		CString SqlGetAllDocNum = "", SqlGetAClassDocNum="";
		SqlGetAllDocNum = "select count(*) from " + CurTrainSet;
		//AfxMessageBox(SqlGetAllDocNum);
		CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetAllDocNum,&RecordsAffected,adCmdText);
		vAllDocNum = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
		StrClassDocNum = vAllDocNum.bstrVal;
		AllDocNum = atof(StrClassDocNum);
		CSim_tcApp::AdoDBObject.m_pRecordset->Close();
	
		StringArray ClassPreProbArray;
		ClassPreProbArray.SetSize(ClassNum,5);

		float tempProb=0.0;
		char buffer[50];
		for (i=0; i<ClassNum; i++){
			temp = ClassLabelArray.ElementAt(i);
			SqlGetAClassDocNum = "select count(*) from " + CurTrainSet + " where classlabel='" + temp + "'";
			CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetAClassDocNum,&RecordsAffected,adCmdText);
			vAClassDocNum = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
			strAClassDocNum = vAClassDocNum.bstrVal;
			AClassDocNum = atof(strAClassDocNum);
			tempProb = AClassDocNum / AllDocNum;
			_gcvt(tempProb, 10, buffer);
			ClassPreProbArray.SetAt(i,buffer);
			CSim_tcApp::AdoDBObject.m_pRecordset->Close();
		}
		CString SqlInsertProb = "insert into " + ProbTableName + " values (";
	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -