📄 adaboost_train_main.cpp
字号:
#include "stdafx.h"
#include "AdaBoost.h"
#include "matrix.h"
#include "Adaboost_train_main.h"
#include "Adaboost_results_file.h"
#include "Adaboost_common.h"
#include "MissingData.h"
//#include "engine.h"
#define BUFSIZE 256
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
static char temstr[200];
/**********************************************************************************/
/* The AdaBoost algorithm. The formula [[E]] is 1 if E is true and 0 otherwise. */
/* Input: a set S, of m labeled examples: S = {(x(i);y(i)); i = 1,2,...,m}, */
/* labels y(i) E Y= {1,...,K} */
/* Learn (a learning algorithm) */
/* a constant L. */
/* */
/* [1] initialize for all i: w(l):= (1 / m); initialize the weights */
/* [2] for L = 1 to L do */
/* __ */
/* [3] for all i: p(i) := w(i)/ \ (w(i)); compute normalized weights */
/* /_i */
/* [4] h(l):= Learn(p (l)) call Learn with normalized weights. */
/* __ */
/* [5] epsilon(i):= \ p(i)[[h(l)(xi) != y(i)]] calculate the error of h(l) */
/* /_i */
/* [7] if epsilon(i) > 1/2 then */
/* [8] L := l - 1 */
/* [9] goto 13 */
/* [10] beta(l)= epsilon(i) / (1-epsilon(i)) */
/* [11] for all i: w(l+1):= w(i)*beta(l)^(1-[h(l)(xi)!=(yi)]; compute new weights */
/* [12] end for */
/* [13] Output: hf(x) = argmax{SIGMA(i)log(1/beta(l)[h(l)(x) = y]; */
/**********************************************************************************/
int Adaboost_train_main()
{
//long *train, *train_label, *test, *test_label;
long train_max_docs,train_max_words_doc,test_max_docs,test_max_words_doc,train_ll,test_ll;
char trainfile[200];
char testfile[200];
char resultfile[200];
RESULTS result;
com_pro.show_action=TRUE;
/* print this message to the windows */
if (com_pro.show_action)
printm("Begin to running Adaboost algorithm");
if (com_pro.show_action)
printm("Start to Scan train & test file...");
/* Copy the train & test files to the right name */
strcpy(trainfile,com_param.trainfile);
strcpy(testfile,com_param.testfile);
strcpy(resultfile,com_param.resultfile);
/* nol_ll function return the size of files, as follow: max_docs => length, max_words_doc => width */
nol_ll(trainfile,&train_max_docs,&train_max_words_doc,&train_ll); /* scan size of input file */
train_max_words_doc+=1;
train_ll+=2;
train_max_docs+=-1;
/* nol_ll function return the size of files, as follow: max_docs => length, max_words_doc => width */
nol_ll(testfile,&test_max_docs,&test_max_words_doc,&test_ll); /* scan size of input file */
test_max_words_doc+=1;
test_ll+=2;
test_max_docs+=-1;
if (com_pro.show_action)
printm("Finish to Scan train & test file...");
// Run Adaboost
int r_train=train_max_docs+1;
int c_train=train_max_words_doc;
int r_test=test_max_docs+1;
int c_test=test_max_words_doc;
if (r_test > r_train)
{
sprintf(temstr,"Error the length of train file must be longer from the length of test file: %.d\n");
printm(temstr);
}
int zero=0;
int one=1;
int two=2;
int value_one=1;
int three=3;
Matrix *train_matrix = new Matrix(r_train,c_train);
Matrix *train_label_matrix = new Matrix(r_train,one);
Matrix *test_matrix = new Matrix(r_test,c_test);
Matrix *test_label_matrix = new Matrix(r_test,one);
//Matrix *final_test_hypothesis = new Matrix(r_test,one); /* For printing of test hypothesis */
Matrix *final_test_hypothesis = new Matrix(1000,one);
Matrix *test_matrix_day = new Matrix(r_test,one);/* For printing of link */
Matrix *test_matrix_intersection = new Matrix(r_test,one);/* For printing of intersection */
Matrix *test_matrix_link = new Matrix(r_test,one);/* For printing of link */
read_train_file(trainfile,train_max_words_doc,train_max_docs,train_ll, train_matrix, train_label_matrix);
read_test_file(testfile,test_max_words_doc,test_max_docs,test_ll,test_matrix, test_label_matrix);
int step=100;
int cycles=100;
int missingValueTrain = 0;
int missingValueTest = 0;
int TrainMissingDataProcedure = 0;
int TestMissingDataProcedure = 0;
//disp('run adaboost with cycles=100');
if (com_pro.show_action)
printm("run adaboost with cycles == 100");
Matrix* boost = new Matrix(step,one);
Matrix* iii = new Matrix(step,one);
Matrix* ttt = new Matrix(step,one);
Matrix* errorTrain_acc = new Matrix(one,12);
Matrix* errorTest_acc = new Matrix(one,12);
/**********************************/
/* READ DATA TO ARRAY FROM MATRIX */
/**********************************/
int train_array[159660];//[80072]; /* 159660 */
int test_array[72072];//[80072]; /*72072*/
//int test_array[159660];
Matrix* get_train_array = new Matrix(train_max_docs,train_max_words_doc);
get_train_array->copyMatrixToArray(train_max_docs,train_max_words_doc-1,train_array,*train_matrix);
Matrix* get_test_array = new Matrix(test_max_docs,test_max_words_doc);
get_test_array->copyMatrixToArray(test_max_docs,test_max_words_doc-1,test_array,*test_matrix);
/***************************************************************************/
/* Check if a value is missing in the train file (the value equal to -999) */
/***************************************************************************/
get_train_array->ifValueIsMissing(&missingValueTrain,*train_matrix);
if (missingValueTrain == 0){
if (com_pro.show_action)
{
printm("There is no missing values in train file");
}
}else{
if (com_pro.show_action)
{
printm("There is missing values in train file");
/*
MissingData *m_MissingData;
m_MissingData = new MissingData();
m_MissingData->DoModal();
*/
/****************************************************************/
/* Choose the procedure for missing data imputation, as follow: */
/* TrainMissingDataProcedure = 1 - Median of chosen column */
/* TrainMissingDataProcedure = 2 - K-nearest neighbor method */
/* TrainMissingDataProcedure = 3 - Ignore from missing data */
/****************************************************************/
TrainMissingDataProcedure = 3;
}
}
delete get_train_array;
/***************************************************************************/
/* Check if a value is missing in the test file (the value equal to -999) */
/***************************************************************************/
get_test_array->ifValueIsMissing(&missingValueTest,*test_matrix);
if (missingValueTest == 0){
if (com_pro.show_action)
{
printm("There is no missing values in test file");
}
}else{
if (com_pro.show_action)
{
printm(" There is missing values in test file ");
/*
MissingData *m_MissingData;
m_MissingData = new MissingData();
m_MissingData->DoModal();
*/
/****************************************************************/
/* Choose the procedure for missing data imputation, as follow: */
/* TestMissingDataProcedure = 1 - Median of chosen column */
/* TestMissingDataProcedure = 2 - K-nearest neighbor method */
/* TestMissingDataProcedure = 3 - Ignore from missing data */
/****************************************************************/
TestMissingDataProcedure = 3;
}
}
delete get_test_array;
int label_train_array[2296];
//int label_train_array[3222];
Matrix* get_label_train_array = new Matrix(train_max_docs,value_one);
get_label_train_array->copyMatrixToArray(train_max_docs,value_one,label_train_array,*train_label_matrix);
delete get_label_train_array;
//int test_array[72072];
//Matrix* get_test_array = new Matrix(test_max_docs,test_max_words_doc);
//get_test_array->copyMatrixToArray(test_max_docs,test_max_words_doc-1,test_array,*test_matrix);
//delete get_test_array;
int label_test_array[2002];
//int label_test_array[1074];
Matrix* get_label_test_array = new Matrix(test_max_docs,value_one);
get_label_test_array->copyMatrixToArray(test_max_docs,value_one,label_test_array,*test_label_matrix);
delete get_label_test_array;
/*********************************************************************************************************/
/* step=100; */
/* cycles=100; */
/* for m=step:step:1000 */
/* disp(m); */
/* boost=adaBoost(train(1:m,:),train_label(1:m),cycles); */
/* [errorTrain(m/step),errorTest(m/step)]=getError(boost,train(1:m,:),train_label(1:m),test,test_label);*/
/* end */
/**********************************************************************************************************/
int tt=0;
while (step < 101)
{
//disp(m);
if (com_pro.show_compute_1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -