📄 stdafx.cpp
字号:
for (i=0; i<ClassNum; i++){
temp = ClassPreProbArray.ElementAt(i);
SqlInsertProb = SqlInsertProb + temp + ",";
}
tempLength = SqlInsertProb.GetLength();
tempLength = tempLength -1;
SqlInsertProb = SqlInsertProb.Left(tempLength);
SqlInsertProb = SqlInsertProb + ")";
//AfxMessageBox(SqlInsertProb);
CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlInsertProb,&RecordsAffected,adCmdText);
//计算在每个类别条件下,每一个单词出现的后验概率
//1.计算 分母(单词总数+整个矩阵的词频和)
//单词总数为m_TrainFileDim
float valFenMu=0.0;
CString SqlGetWordFreq = "select sum(wd";
_variant_t TotalFreq;
for (i=1; i<=m_TrainFileDim; i++){
_itoa( i, buffer, 10 );
SqlGetWordFreq = SqlGetWordFreq + buffer +") from " + CurTrainSet;
//AfxMessageBox(SqlGetWordFreq);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetWordFreq,&RecordsAffected,adCmdText);
TotalFreq = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
valFenMu = valFenMu + TotalFreq.bVal;
SqlGetWordFreq = "select sum(wd";
}
valFenMu = valFenMu + m_TrainFileDim;
//2.计算分子,分别计算每个单词在每个类别下的后验概率
SqlGetWordFreq = "select sum(wd";
int j=0;
float valFenZi=0.0;
float AWordProb;
_variant_t vAWordFreq;
CString SqlInsertWordFreq = "";
for (i=1; i<=m_TrainFileDim; i++){
for (j=0; j<ClassNum; j++){//单词i对应的各个类别的概率
temp = ClassLabelArray.GetAt(j);
_itoa(i, buffer, 10 );
SqlGetWordFreq = SqlGetWordFreq + buffer +") from " + CurTrainSet + " where classlabel='" + temp + "'" ;
//AfxMessageBox(SqlGetWordFreq);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetWordFreq,&RecordsAffected,adCmdText);
vAWordFreq = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
valFenZi = valFenZi + vAWordFreq.bVal;
valFenZi = valFenZi + 1;
AWordProb = valFenZi / valFenMu;
_gcvt(AWordProb, 10, buffer);
ClassPreProbArray.SetAt(j,buffer);
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
SqlGetWordFreq = "select sum(wd";
valFenZi = 0.0;
}
SqlInsertWordFreq = "insert into " + ProbTableName + " values(";
for (j=0; j<ClassNum; j++){
SqlInsertWordFreq = SqlInsertWordFreq + ClassPreProbArray.GetAt(j) + ",";
}
SqlInsertWordFreq = SqlInsertWordFreq.Left(SqlInsertWordFreq.GetLength()-1);
SqlInsertWordFreq = SqlInsertWordFreq + ")";
//AfxMessageBox(SqlInsertWordFreq);
CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlInsertWordFreq,&RecordsAffected,adCmdText);
SqlGetWordFreq = "select sum(wd";
SqlInsertWordFreq = "";
}
AfxMessageBox("成功了也!");
}
else {
return;
}
}
//Bayes的贝努利模型训练方法
void BernulliBayesTrain(CString CurTrainSet)
{
_variant_t RecordsAffected;
_variant_t vAttrNum;
_variant_t vClassNum;
_variant_t vClassLabel;
_variant_t vIndex = (long)0;
CString SqlCheckTrainSetDim="";
int m_TrainFileDim;
//在classification数据库中建立表存放中间求出的概率结果
//1.在classification数据库中创建新表,用于存放中间结果
CString SqlCreateProbTable = "";
CString SqlCheckTrained = "";
CString ProbTableName = "",temp="";
ProbTableName = CurTrainSet + "_ProbResult";
_variant_t vCheckResult;
int CheckResult;
//检查是否已经训练过
SqlCheckTrained = "select count(name) from sysobjects where xtype='u' and name<>'dtproperties' and name='" +\
ProbTableName + "'";
//AfxMessageBox(SqlCheckTrained);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlCheckTrained,&RecordsAffected,adCmdText);
vCheckResult = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
CheckResult = vCheckResult.lVal;
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
if (CheckResult == 0){
//2.维数(单词的个数)
SqlCheckTrainSetDim = "select count(name) from syscolumns where syscolumns.id in (select id from sysobjects where name = '" + CurTrainSet + "')"; //查询属性个数
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlCheckTrainSetDim,&RecordsAffected,adCmdText);
vAttrNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
m_TrainFileDim = vAttrNum.lVal;
m_TrainFileDim = m_TrainFileDim - 2;
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
//2.类别的名称
StringArray ClassLabelArray;
CString SqlGetClassLabel = "";
CString ClassLabel="";
// 查询类别个数
SqlGetClassLabel = "select count(distinct classlabel) from " + CurTrainSet;
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
vClassNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
int ClassNum = vClassNum.lVal;
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
ClassLabelArray.SetSize(ClassNum,5);
//查询类别名称
SqlGetClassLabel = "select distinct classlabel from " + CurTrainSet;
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
int i=0;
while(!CSim_tcApp::AdoDBObject.m_pRecordset->adoEOF){
vClassLabel = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
ClassLabel = vClassLabel.bstrVal;
ClassLabel.TrimLeft();
ClassLabel.TrimRight();
ClassLabelArray.SetAt(i,ClassLabel);
i++;
CSim_tcApp::AdoDBObject.m_pRecordset->MoveNext();
}
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
ClassLabelArray.FreeExtra();
SqlCreateProbTable = "Create table " + ProbTableName + "(rowid int identity(0,1) not null primary key, ";
for (i=0; i<ClassNum; i++){
temp = ClassLabelArray.GetAt(i);
SqlCreateProbTable = SqlCreateProbTable + temp + " float,";
}
int tempLength = SqlCreateProbTable.GetLength();
tempLength = tempLength -1;
SqlCreateProbTable = SqlCreateProbTable.Left(tempLength);
SqlCreateProbTable = SqlCreateProbTable + ")";
//AfxMessageBox(SqlCreateProbTable);
CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlCreateProbTable,&RecordsAffected,adCmdText);
//计算每一个类别出现的先验概率
_variant_t vAllDocNum;
_variant_t vAClassDocNum;
float AllDocNum,AClassDocNum=0.0;
CString StrClassDocNum,strAClassDocNum;
CString SqlGetAllDocNum = "", SqlGetAClassDocNum="";
SqlGetAllDocNum = "select count(*) from " + CurTrainSet;
//AfxMessageBox(SqlGetAllDocNum);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetAllDocNum,&RecordsAffected,adCmdText);
vAllDocNum = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
StrClassDocNum = vAllDocNum.bstrVal;
AllDocNum = atof(StrClassDocNum);
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
StringArray ClassPreProbArray;
ClassPreProbArray.SetSize(ClassNum,5);
float tempProb=0.0;
char buffer[50];
for (i=0; i<ClassNum; i++){
temp = ClassLabelArray.ElementAt(i);
SqlGetAClassDocNum = "select count(*) from " + CurTrainSet + " where classlabel='" + temp + "'";
//AfxMessageBox(SqlGetAClassDocNum);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetAClassDocNum,&RecordsAffected,adCmdText);
vAClassDocNum = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
strAClassDocNum = vAClassDocNum.bstrVal;
AClassDocNum = atof(strAClassDocNum);
tempProb = AClassDocNum / AllDocNum;
_gcvt(tempProb, 10, buffer);
ClassPreProbArray.SetAt(i,buffer);
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
}
CString SqlInsertProb = "insert into " + ProbTableName + " values (";
for (i=0; i<ClassNum; i++){
temp = ClassPreProbArray.ElementAt(i);
SqlInsertProb = SqlInsertProb + temp + ",";
}
tempLength = SqlInsertProb.GetLength();
tempLength = tempLength -1;
SqlInsertProb = SqlInsertProb.Left(tempLength);
SqlInsertProb = SqlInsertProb + ")";
//AfxMessageBox(SqlInsertProb);
CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlInsertProb,&RecordsAffected,adCmdText);
//计算在每个类别条件下,每一个单词出现的后验概率
//1.计算 分母(单词总数+整个矩阵的词频和)
//单词总数为m_TrainFileDim
float valFenMu=0.0;
CString SqlGetWordFreq = "select sum(wd";
_variant_t TotalFreq;
for (i=1; i<=m_TrainFileDim; i++){
_itoa( i, buffer, 10 );
SqlGetWordFreq = SqlGetWordFreq + buffer +") from " + CurTrainSet;
//AfxMessageBox(SqlGetWordFreq);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetWordFreq,&RecordsAffected,adCmdText);
TotalFreq = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
valFenMu = valFenMu + TotalFreq.bVal;
SqlGetWordFreq = "select sum(wd";
}
valFenMu = valFenMu + m_TrainFileDim;
//2.计算分子,分别计算每个单词在每个类别下的后验概率
SqlGetWordFreq = "select sum(wd";
int j=0;
float valFenZi=0.0;
float AWordProb;
_variant_t vAWordFreq;
CString SqlInsertWordFreq = "";
for (i=1; i<=m_TrainFileDim; i++){
for (j=0; j<ClassNum; j++){//单词i对应的各个类别的概率
temp = ClassLabelArray.GetAt(j);
_itoa(i, buffer, 10 );
SqlGetWordFreq = SqlGetWordFreq + buffer +") from " + CurTrainSet + " where classlabel='" + temp + "'" ;
//AfxMessageBox(SqlGetWordFreq);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetWordFreq,&RecordsAffected,adCmdText);
vAWordFreq = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
valFenZi = valFenZi + vAWordFreq.bVal;
valFenZi = valFenZi + 1;
AWordProb = valFenZi / valFenMu;
_gcvt(AWordProb, 10, buffer);
ClassPreProbArray.SetAt(j,buffer);
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
SqlGetWordFreq = "select sum(wd";
valFenZi = 0.0;
}
SqlInsertWordFreq = "insert into " + ProbTableName + " values(";
for (j=0; j<ClassNum; j++){
SqlInsertWordFreq = SqlInsertWordFreq + ClassPreProbArray.GetAt(j) + ",";
}
SqlInsertWordFreq = SqlInsertWordFreq.Left(SqlInsertWordFreq.GetLength()-1);
SqlInsertWordFreq = SqlInsertWordFreq + ")";
//AfxMessageBox(SqlInsertWordFreq);
CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlInsertWordFreq,&RecordsAffected,adCmdText);
SqlGetWordFreq = "select sum(wd";
SqlInsertWordFreq = "";
}
AfxMessageBox("成功了也!");
}
else {
return;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -