📄 stdafx.cpp
字号:
// stdafx.cpp : source file that includes just the standard includes
// sim_tc.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"
#include "sim_tc.h"
#include <afx.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
//为分类结果排序
int SortClsResult(DoubleArray* ClsResult)
{
int n = ClsResult->GetSize();
int FirstValue = ClsResult->GetAt(0);
int Index = 0;
int LatterValue;
for (int i=1; i<n; i++){
//LatterValue = ClsResult.GetAt(i);
LatterValue = ClsResult->GetAt(i);
if (LatterValue > FirstValue){
FirstValue = LatterValue;
Index = i;
}
}
return Index;
}
//计算分类文本的维数
int GetVectorDim(CString VectorText)
{
int i=0,TextLength=0,TextDim=0;
char indexChar;
TextLength = VectorText.GetLength();
for (i=0; i<TextLength; i++){
indexChar = VectorText.GetAt(i);
if ((indexChar == ',')||(indexChar == ';')){
TextDim++;
}
}
TextDim ++;
return TextDim;
}
int ReadTrainDocDim(CFileItem* pFileItem)
{
CStdioFile TrainDocItem(pFileItem->m_FilePath,CFile::modeRead);
char buffer[20000];
//CString str="";
TrainDocItem.ReadString(buffer,19999);
int TrainDocDim = 0;
int i = 0;
while (buffer[i] != '\n'){
if (buffer[i] == ',')
TrainDocDim ++;
i++;
}
TrainDocItem.Close();
return TrainDocDim;
//AfxMessageBox(TrainDocDim);
}
//生成创建新的数据集的sql语句
CString SqlCreateTable(int colNumber, CString tableName)
{
int i;
CString colName;
char buffer[20];
CString strtemp="create table " +tableName +"(";
for (i=1; i<=colNumber; i++){
_itoa( i, buffer, 10 );
colName = (CString)buffer;
colName = "wd"+colName;
strtemp = strtemp + colName + " " + "smallint,";
};
strtemp = strtemp + "classlabel char(20), rowid int identity(0,1) not null primary key)";
return strtemp;
}
//ChangeSQL 读出文件的维数,生成插入数据的sql语句
CString ChangeSQL(CString str,int comaNum)
{
int i=0;
int j= comaNum;
//bool stop = false;
CString FileRecord = str;
int Length = str.GetLength();
CString NewSQL;
while (FileRecord.GetAt(i)!='\0'){
if (FileRecord.GetAt(i) == ',') j--;
if (j==0){
NewSQL = FileRecord.Left(i+1) + "'";
//stop = true;
break;
}
i++;
}
NewSQL = NewSQL + FileRecord.Right(Length-i-1);
NewSQL = NewSQL.Left(NewSQL.GetLength()-1) + "'";
return NewSQL;
}
//将文件导入数据库
BOOL AddFileToDB(CFileArray* pFileHead, CString DbName, int m_TrainDocDim)
{
// CFileArray* pFileHead = m_TrainFileArray;
CString str1 = "";
CString str2="";
CString headSQL="",SQL;
headSQL= "insert into " + DbName +" values(";
int i,j=1; //j为控制while循环的变量
int n;
n = pFileHead->GetSize();
for (i=0;i<n;i++){
CString name= pFileHead->ElementAt(i)->m_FilePath;
CStdioFile TrainFile(name,CFile::modeRead);
while (j){
//TrainFile.ReadString(buffer, 19999);
TrainFile.ReadString(str2);
str1 = str2 + '\0';
SQL = ChangeSQL(str1,m_TrainDocDim);
//CFile myfile("haha.txt", CFile::modeWrite|CFile::modeCreate);
//myfile.Write(buffer, sizeof(buffer));
//str= buffer;
if (str2!=""){
SQL =headSQL +SQL +")";
CSim_tcApp::AdoDBObject.ExecuteSQL(SQL);
//CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlCreateNewDataset,&RecordsAffected,adCmdText);
//AfxMessageBox(SQL);
}
else
j=0;
//str = "";
}
TrainFile.Close();
j=1;
}
return TRUE;
}
//Bayes的Multinomal模型训练方法
void MultinomalBayesTrain(CString CurTrainSet)
{
_variant_t RecordsAffected;
_variant_t vAttrNum;
_variant_t vClassNum;
_variant_t vClassLabel;
_variant_t vIndex = (long)0;
CString SqlCheckTrainSetDim="";
int m_TrainFileDim;
//在classification数据库中建立表存放中间求出的概率结果
//1.在classification数据库中创建新表,用于存放中间结果
CString SqlCreateProbTable = "";
CString SqlCheckTrained = "";
CString ProbTableName = "",temp="";
ProbTableName = CurTrainSet + "_MultinomalResult";
_variant_t vCheckResult;
int CheckResult;
//检查是否已经训练过
SqlCheckTrained = "select count(name) from sysobjects where xtype='u' and name<>'dtproperties' and name='" +\
ProbTableName + "'";
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlCheckTrained,&RecordsAffected,adCmdText);
vCheckResult = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
CheckResult = vCheckResult.lVal;
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
if (CheckResult == 0){
//2.维数(单词的个数)
SqlCheckTrainSetDim = "select count(name) from syscolumns where syscolumns.id in (select id from sysobjects where name = '" + CurTrainSet + "')"; //查询属性个数
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlCheckTrainSetDim,&RecordsAffected,adCmdText);
vAttrNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
m_TrainFileDim = vAttrNum.lVal;
m_TrainFileDim = m_TrainFileDim - 2;
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
//2.类别的名称
StringArray ClassLabelArray;
CString SqlGetClassLabel = "";
CString ClassLabel="";
// 查询类别个数
SqlGetClassLabel = "select count(distinct classlabel) from " + CurTrainSet;
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
vClassNum = CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
int ClassNum = vClassNum.lVal;
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
ClassLabelArray.SetSize(ClassNum,5);
//查询类别名称
SqlGetClassLabel = "select distinct classlabel from " + CurTrainSet;
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetClassLabel,&RecordsAffected,adCmdText);
int i=0;
while(!CSim_tcApp::AdoDBObject.m_pRecordset->adoEOF){
vClassLabel = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
ClassLabel = vClassLabel.bstrVal;
ClassLabel.TrimLeft();
ClassLabel.TrimRight();
ClassLabelArray.SetAt(i,ClassLabel);
i++;
CSim_tcApp::AdoDBObject.m_pRecordset->MoveNext();
}
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
ClassLabelArray.FreeExtra();
SqlCreateProbTable = "Create table " + ProbTableName + "(rowid int identity(0,1) not null primary key, ";
for (i=0; i<ClassNum; i++){
temp = "class_" + ClassLabelArray.GetAt(i);
SqlCreateProbTable = SqlCreateProbTable + temp + " float,";
}
int tempLength = SqlCreateProbTable.GetLength();
tempLength = tempLength -1;
SqlCreateProbTable = SqlCreateProbTable.Left(tempLength);
SqlCreateProbTable = SqlCreateProbTable + ")";
//AfxMessageBox(SqlCreateProbTable);
CSim_tcApp::AdoDBObject.m_pConnClsDB->Execute((_bstr_t)SqlCreateProbTable,&RecordsAffected,adCmdText);
//计算每一个类别出现的先验概率
_variant_t vAllDocNum;
_variant_t vAClassDocNum;
float AllDocNum,AClassDocNum=0.0;
CString StrClassDocNum,strAClassDocNum;
CString SqlGetAllDocNum = "", SqlGetAClassDocNum="";
SqlGetAllDocNum = "select count(*) from " + CurTrainSet;
//AfxMessageBox(SqlGetAllDocNum);
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetAllDocNum,&RecordsAffected,adCmdText);
vAllDocNum = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
StrClassDocNum = vAllDocNum.bstrVal;
AllDocNum = atof(StrClassDocNum);
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
StringArray ClassPreProbArray;
ClassPreProbArray.SetSize(ClassNum,5);
float tempProb=0.0;
char buffer[50];
for (i=0; i<ClassNum; i++){
temp = ClassLabelArray.ElementAt(i);
SqlGetAClassDocNum = "select count(*) from " + CurTrainSet + " where classlabel='" + temp + "'";
CSim_tcApp::AdoDBObject.m_pRecordset = CSim_tcApp::AdoDBObject.m_pConnection->Execute((_bstr_t)SqlGetAClassDocNum,&RecordsAffected,adCmdText);
vAClassDocNum = (_bstr_t)CSim_tcApp::AdoDBObject.m_pRecordset->GetCollect(vIndex);
strAClassDocNum = vAClassDocNum.bstrVal;
AClassDocNum = atof(strAClassDocNum);
tempProb = AClassDocNum / AllDocNum;
_gcvt(tempProb, 10, buffer);
ClassPreProbArray.SetAt(i,buffer);
CSim_tcApp::AdoDBObject.m_pRecordset->Close();
}
CString SqlInsertProb = "insert into " + ProbTableName + " values (";
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -