📄 adaboost_common.cpp
字号:
#include "stdafx.h"
#include "AdaBoost.h"
#include "Adaboost_common.h"
#include "math.h"
#include "matrix.h"
#include "randomForest.h"
#include <time.h>
#include "f2c.h"
#include <stdlib.h> /* For _MAX_PATH definition */
#include <stdio.h>
#include <malloc.h>
#include <memory.h>
#include <string.h>
#include <conio.h>
#include <dos.h>
#include <algorithm>
#include <iostream>
#include <vector>
#ifdef _WIN32
using namespace std;
#endif
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
using namespace std ;
static char temstr[200];
typedef vector<int> INTVECTOR;
/*read the data from train documents*/
int read_train_file(char *trainfile,
long train_max_words_doc,
long train_max_docs,
long train_ll,
Matrix* train_matrix,
Matrix *train_label_matrix)
{
FILE *docfl;
char *line;
line = (char *)my_malloc(sizeof(char)*train_ll);
if ((docfl = fopen (trainfile, "r")) == NULL)
{
printe (trainfile);
return -1;
}
if (com_pro.show_readfile)
{
sprintf(temstr,"Reading examples into memory...");
printm(temstr);
}
train_matrix->readFile(trainfile);
int i=1;
while((!feof(docfl)) && fgets(line,(int)train_ll,docfl))
{
if (line[0] == '#') continue; /* line contains comments */
int len=0,train_label_end=0;
len = strlen(line);
len=len-2;
if((sscanf(line+len,"%ld",&train_label_end)) == EOF) return(0);
double value = (double) train_label_end;
int one=0;
train_label_matrix->set(i,one,value);
i++;
}
fclose(docfl);
}
/*read the data from test documents*/
int read_test_file(char *testfile,
long test_max_words_doc,
long test_max_docs,
long test_ll,
Matrix* test_matrix,
Matrix* test_label_matrix)
{
FILE *docfl;
char *line;
line = (char *)my_malloc(sizeof(char)*test_ll);
if ((docfl = fopen (testfile, "r")) == NULL)
{
printe (testfile);
return -1;
}
if (com_pro.show_readfile)
{
sprintf(temstr,"Reading examples into memory...");
printm(temstr);
}
// Load test matrix from train file
test_matrix->readFile(testfile);
int i=1;
while((!feof(docfl)) && fgets(line,(int)test_ll,docfl))
{
if (line[0] == '#') continue; /* line contains comments */
int len=0,test_label_end=0;
len = strlen(line);
len=len-2;
if((sscanf(line+len,"%ld",&test_label_end)) == EOF) return(0);
double value = (double) test_label_end;
int one=0;
test_label_matrix->set(i,one,value);
i++;
}
fclose(docfl);
}
/*************************************************************************************/
/* function boosted=adaBoost(train,train_label,cycles) */
/* disp('running adaBoost algorithm'); */
/* d=size(train); */
/* distribution=ones(1,d(1))/d(1); */
/* error=zeros(1,cycles); */
/* beta=zeros(1,cycles); */
/* label=(train_label(:)>=5);% contain the correct label per vector */
/* for j=1:cycles */
/* if(mod(j,10)==0) */
/* disp([j,cycles]); */
/* end */
/* [i,t]=weakLearner(distribution,train,label); */
/* error(j)=distribution*abs(label-(train(:,i)>=t)); */
/* beta(j)=error(j)/(1-error(j)); */
/* boosted(j,:)=[beta(j),i,t]; */
/* distribution=distribution.* exp(log(beta(j))*(1-abs(label-(train(:,i)>=t))))'; */
/* distribution=distribution/sum(distribution); */
/* end */
/*************************************************************************************/
int adaBoost(Matrix* train,
Matrix* train_matrix,
Matrix* train_label,
Matrix* train_label_matrix,
Matrix* train_hypothesis,
Matrix* test,
Matrix* test_matrix,
Matrix* test_label,
Matrix* test_label_matrix,
Matrix* test_hypothesis,
long train_max_words_doc,
long train_max_docs,
long test_max_words_doc,
long test_max_docs,
int step,
int cycles,
Matrix* boost,
int train_array[],
int label_train_array[],
int test_array[],
int label_test_array[],
double ave_delta,
int sign,
int missingValueTrain,
int missingValueTest,
int TrainMissingDataProcedure,
int TestMissingDataProcedure)
{
int r_train=train_max_docs+1;
int c_train=train_max_words_doc;
int value_one = 1;
int value_two = 2;
int value_1000 = 1000;
int cycles_num=0;
Matrix* scalar_minus_error = new Matrix(value_one+1,step);
if (step <= 100){
cycles_num=step;
}
if (step > 100){
cycles_num=100;
if (sign > 1){
cycles_num=step;
}
}
Matrix* size = new Matrix(value_one,value_one);
int train_hypothesis_row;
int train_hypothesis_col;
size->matrixSize(&train_hypothesis_row,&train_hypothesis_col,*train_hypothesis);
if (train_hypothesis_row == r_train-1){
step=r_train-1;
}
delete size;
Matrix* distribution= new Matrix(value_one+1,step);
distribution->ones(value_one+1,step,value_one,step);
//distribution->ones(value_one+1,train_max_docs,value_one,step);
//distribution->matrixTranspose(value_one,train_max_docs,*distribution);
Matrix* error= new Matrix(value_one+1,cycles);
error->zeros(value_one+1,cycles);
Matrix* beta= new Matrix(value_one+1,cycles);
beta->zeros(value_one+1,cycles);
// temprary Matrix
Matrix* train_error_part = new Matrix(step,value_one+1);
Matrix* error_label_train = new Matrix(step,value_one+1);
Matrix* error_abs_label_train = new Matrix(step,value_one+1);
Matrix* error_label_train_1 = new Matrix(step,value_one+1);
Matrix* error_abs_label_train_1 = new Matrix(step,value_one+1);
Matrix* scalar_minus_error_1 = new Matrix(step,value_one+1);
Matrix* val_beta = new Matrix(value_one+1,value_one+1);
Matrix* beta_label_train = new Matrix(step,value_one+1);
Matrix* expMatrix = new Matrix(step,value_one+1);
Matrix* transposeExpMatrix = new Matrix(value_one+1,step);
Matrix* sumMatrix = new Matrix(value_one+1,step);
double Array_boost[2296];
//double Array_boost[3222];
Matrix* getArrayboost = new Matrix(cycles,value_one);
// label_matrix are contain the correct label per vector
Matrix label_matrix = (*train_label)>3;
if (com_pro.show_action)
printm("running adaBoost algorithm");
for (int j=1; j <= cycles_num; j++)
{
if ((j % 10) == 0)
{
if (com_pro.show_compute_1)
{
sprintf(temstr,"%d %d\n",j,cycles);
printm(temstr);
}
}
double k = 1;
/************************************************************/
/* h(l) = Learn(p(l)) call Learn with normalized weights. */
/* This randomForest_weakLearner will return the hypothesis */
/* of classes */
/************************************************************/
Matrix* train_error_array = new Matrix(step,value_one);
Matrix* test_error_array = new Matrix(step,value_one);
Matrix* checkIfZero = new Matrix(value_one,value_one);
static int ntest;
ntest = 1;
train_matrix->MatrixMultiMatrixCOL(*distribution,*train_matrix);
randomForest_weakLearner(distribution,TrainMissingDataProcedure,TestMissingDataProcedure,missingValueTest,
train_matrix,train_label_matrix,test_matrix,test_label_matrix,train_max_words_doc,
train_max_docs,test_max_words_doc,test_max_docs,step,cycles_num,ntest,train_error_array,
test_error_array,train_array,label_train_array,test_array,label_test_array,missingValueTrain);
/******************************************************************************/
/* The predicted values of the input training set based on out-of-bag samples */
/******************************************************************************/
train_hypothesis->copyVectorToMatrix(j,*train_error_array);
/**************************************************************************/
/* The predicted values of the input test set based on out-of-bag samples */
/**************************************************************************/
test_hypothesis->copyVectorToMatrix(j,*test_error_array);
/*****************************************************/
/* error(j)=distribution*abs(label - hypothesis) */
/*****************************************************/
// (label-hypothesis)
error_label_train->matrixMinusMatrix(*train_error_array,label_matrix);
// abs(label-hypothesis)
error_abs_label_train->matrixAbs(*error_label_train);
// distribution*abs(label-hypothesis)
Matrix error_tmp = (*distribution)*(*error_abs_label_train);
// Check if the error results is OK
checkIfZero->ifValZero(step,error_tmp);
//error(j)=distribution*abs(label-hypothesis);
error->copy(j-1,*checkIfZero);
/**********************************/
/* beta(j)=error(j)/(1-error(j)); */
/**********************************/
scalar_minus_error->ScalarMinusMatrix(k,*error);
Matrix beta = (*error) / (*scalar_minus_error);
/*******************************/
/* boosted(j,:)=[beta(j),i,t]; */
/*******************************/
boost->copyToMatrix(j,beta);
Matrix* matrix_boost = new Matrix(value_one,value_one);
getArrayboost->matrixToArray(Array_boost,*boost);
/*********************************************************************************/
/* distribution = distribution .* exp(log(beta(j))*(1-abs(label - hypothesis )) */
/*********************************************************************************/
// (1 - abs(label - hypothesis))
scalar_minus_error_1->ScalarMinusMatrix(k,*error_abs_label_train);
// log(beta(j))
double y;
double val;
val_beta->getValue(j,&val,beta);
y = log(val);
// log(beta(j)) * (1-abs(label - hypothesis))
beta_label_train->ScalarMultiMatrix(y,*scalar_minus_error_1);
expMatrix->matrixExp(*beta_label_train);
transposeExpMatrix->matrixTranspose(step,value_one,*expMatrix);
// distribution=distribution.* exp(log(beta(j))*(1-abs(label-hypothesis))';
distribution->MatrixMultiMatrix(*distribution,*transposeExpMatrix);
/************************************************/
/* distribution=distribution/sum(distribution); */
/************************************************/
double sum_distribution=0;
sumMatrix->matrixSumCol(&sum_distribution, *distribution);
distribution->matrixdDivisonScalar(sum_distribution,*distribution);
//distribution->matrixTranspose(test_max_docs,value_one,*distribution);
//delete matrix
delete train_error_array;
delete test_error_array;
delete checkIfZero;
}
// delete matrix
delete train_error_part;
delete error_label_train;
delete error_abs_label_train;
delete scalar_minus_error;
delete error_label_train_1;
delete error_abs_label_train_1;
delete scalar_minus_error_1;
delete val_beta;
delete beta_label_train;
delete sumMatrix;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -