📄 copy of adaboost_common.cpp
字号:
#include "stdafx.h"
#include "AdaBoost.h"
#include "Adaboost_common.h"
#include "math.h"
#include "matrix.h"
#include <stdlib.h> /* For _MAX_PATH definition */
#include <stdio.h>
#include <malloc.h>
#include <memory.h>
#include <string.h>
#include <conio.h>
#include <dos.h>
#include <algorithm>
#include <iostream>
#include <vector>
#ifdef _WIN32
using namespace std;
#endif
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
using namespace std ;
static char temstr[200];
typedef vector<int> INTVECTOR;
/*read the data from train documents*/
int read_train_file(char *trainfile,
long train_max_words_doc,
long train_max_docs,
long train_ll,
Matrix* train_matrix,
Matrix *train_label_matrix)
{
FILE *docfl;
char *line;
line = (char *)my_malloc(sizeof(char)*train_ll);
if ((docfl = fopen (trainfile, "r")) == NULL)
{
printe (trainfile);
return -1;
}
if (com_pro.show_readfile)
{
sprintf(temstr,"Reading examples into memory...");
printm(temstr);
}
// Load train matrix from train file
// int r=train_max_docs+1;
// int c=train_max_words_doc;
// Matrix::Matrix(r,c);
// Matrix *train_matrix = new Matrix(r,c);
train_matrix->readFile(trainfile);
// Matrix mm=(*train_matrix)*(*train_matrix);
// Load train label vector train file
// Matrix *train_label_matrix = new Matrix(r,c);
int i=1;
while((!feof(docfl)) && fgets(line,(int)train_ll,docfl))
{
if (line[0] == '#') continue; /* line contains comments */
int len=0,train_label_end=0;
len = strlen(line);
len=len-2;
if((sscanf(line+len,"%ld",&train_label_end)) == EOF) return(0);
//theVector.push_back(train_label_end);
double value = (double) train_label_end;
int one=0;
train_label_matrix->set(i,one,value);
i++;
}
// Matrix *vm_1 = new Matrix(r,c);
// Matrix mv_1=(*train_label_matrix)*(*train_label_matrix);
fclose(docfl);
}
int read_test_file(char *testfile,
long test_max_words_doc,
long test_max_docs,
long test_ll,
Matrix* test_matrix,
Matrix* test_label_matrix)
{
FILE *docfl;
char *line;
line = (char *)my_malloc(sizeof(char)*test_ll);
if ((docfl = fopen (testfile, "r")) == NULL)
{
printe (testfile);
return -1;
}
if (com_pro.show_readfile)
{
sprintf(temstr,"Reading examples into memory...");
printm(temstr);
}
// Load test matrix from train file
test_matrix->readFile(testfile);
int i=1;
while((!feof(docfl)) && fgets(line,(int)test_ll,docfl))
{
if (line[0] == '#') continue; /* line contains comments */
int len=0,test_label_end=0;
len = strlen(line);
len=len-2;
if((sscanf(line+len,"%ld",&test_label_end)) == EOF) return(0);
double value = (double) test_label_end;
int one=0;
test_label_matrix->set(i,one,value);
i++;
}
fclose(docfl);
}
/*
function boosted=adaBoost(train,train_label,cycles)
disp('running adaBoost algorithm');
d=size(train);
distribution=ones(1,d(1))/d(1);
error=zeros(1,cycles);
beta=zeros(1,cycles);
label=(train_label(:)>=5);% contain the correct label per vector
for j=1:cycles
if(mod(j,10)==0)
disp([j,cycles]);
end
[i,t]=weakLearner(distribution,train,label);
error(j)=distribution*abs(label-(train(:,i)>=t));
beta(j)=error(j)/(1-error(j));
boosted(j,:)=[beta(j),i,t];
distribution=distribution.* exp(log(beta(j))*(1-abs(label-(train(:,i)>=t))))';
distribution=distribution/sum(distribution);
end
*/
int adaBoost(Matrix* train,
Matrix* train_label,
long train_max_words_doc,
long train_max_docs,
int step,
int cycles,
Matrix* boost,
Matrix* iii,
Matrix* ttt)
{
int r_train=train_max_docs+1;
int c_train=train_max_words_doc;
int value_one = 1;
int value_1000 = 1000;
Matrix* distribution= new Matrix(value_one,step);
distribution->ones(value_one,step,value_one,train_max_docs+1);
Matrix* error= new Matrix(value_one,step);
error->zeros(value_one,cycles);
Matrix* beta= new Matrix(value_one,cycles);
beta->zeros(value_one,cycles);
// temprary Matrix
Matrix* train_error_part = new Matrix(step,value_one);
Matrix* error_abs_label_train = new Matrix(step,value_one+1);
Matrix* scalar_minus_error = new Matrix(value_one,step);
Matrix* train_error_part_1 = new Matrix(step,value_one);
Matrix* error_abs_label_train_1 = new Matrix(step,value_one+1);
Matrix* scalar_minus_error_1 = new Matrix(step,value_one);
Matrix* val_beta = new Matrix(value_one,value_one);
Matrix* beta_label_train = new Matrix(step,value_one);
Matrix* expMatrix = new Matrix(step,value_one);
Matrix* transposeExpMatrix = new Matrix(value_one,step);
Matrix* sumMatrix = new Matrix(value_one,step);
// label_matrix are contain the correct label per vector
Matrix label_matrix = (*train_label)>3;
for (int j=1; j <= cycles; j++)
{
if ((j % 10) == 0)
{
printf("%d %d\n",j,cycles);
}
/************************************************/
/* [i,t]=weakLearner(distribution,train,label); */
/************************************************/
int i,t;
weakLearner(distribution,train,train_label,train_max_words_doc,train_max_docs,step, &i, &t);
/*****************************************************/
/* error(j)=distribution*abs(label-(train(:,i)>=t)); */
/*****************************************************/
// train(:,i)>=t)
train_error_part->specificPartOfMatrix(*train,step,i);
Matrix train_error = (*train_error_part)>t;
// (label-(train(:,i)>=t))
Matrix error_label_train = (label_matrix)-(train_error);
// abs(label-(train(:,i)>=t))
error_abs_label_train->matrixAbs(error_label_train);
// distribution*abs(label-(train(:,i)>=t))
Matrix error_tmp = (*distribution)*(*error_abs_label_train);
//error(j)=distribution*abs(label-(train(:,i)>=t));
error->copy(j-1,error_tmp);
/**********************************/
/* beta(j)=error(j)/(1-error(j)); */
/**********************************/
double k = 1;
scalar_minus_error->ScalarMinusMatrix(k,*error);
Matrix beta = (*error) / (*scalar_minus_error);
/*******************************/
/* boosted(j,:)=[beta(j),i,t]; */
/*******************************/
boost->copyToMatrix(j,beta);
iii->setValue(j,i);
ttt->setValue(j,t);
/*********************************************************************************/
/* distribution=distribution.* exp(log(beta(j))*(1-abs(label-(train(:,i)>=t))))' */
/*********************************************************************************/
// (train(:,i)>=t)
train_error_part_1->specificPartOfMatrix(*train,step,i);
Matrix train_error_1 = (*train_error_part_1)>t;
// (label-(train(:,i)>=t))
Matrix error_label_train_1 = (label_matrix)-(train_error_1);
// abs(label-(train(:,i)>=t))
error_abs_label_train_1->matrixAbs(error_label_train_1);
// (1-abs(label-(train(:,i)>=t))))
scalar_minus_error_1->ScalarMinusMatrix(k,*error_abs_label_train_1);
// log(beta(j))
double y;
double val;
val_beta->getValue(j,&val,beta);
y = log(val);
// log(beta(j)) * (1-abs(label-(train(:,i)>=t))))
beta_label_train->ScalarMultiMatrix(y,*scalar_minus_error_1);
// exp(log(beta(j))*(1-abs(label-(train(:,i)>=t))))'
expMatrix->matrixExp(*beta_label_train);
transposeExpMatrix->matrixTranspose(cycles,value_one,*expMatrix);
// distribution=distribution.* exp(log(beta(j))*(1-abs(label-(train(:,i)>=t))))';
distribution->MatrixMultiMatrix(*distribution,*transposeExpMatrix);
//distribution=distribution/sum(distribution);
double sum_distribution=0;
sumMatrix->matrixSumCol(&sum_distribution, *distribution);
distribution->matrixdDivisonScalar(sum_distribution,*distribution);
}
// free matrix
free(train_error_part);
free(error_abs_label_train);
free(scalar_minus_error);
free(train_error_part_1);
free(error_abs_label_train_1);
free(scalar_minus_error_1);
free(val_beta);
free(beta_label_train);
free(sumMatrix);
free(transposeExpMatrix);
free(expMatrix);
free(distribution);
free(error);
free(beta);
return(0);
}
/*
function [i,t] = weakLearner(distribution,train,label)
%disp('run weakLearner');
for tt=1:(16*256-1)
error(tt)=distribution*abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1)));
end
[val,tt]=max(abs(error-0.5));
i=floor(tt/16)+1;
t=16*(mod(tt,16)+1);
*/
int weakLearner(Matrix* distribution,
Matrix* train,
Matrix* train_label,
long train_max_words_doc,
long train_max_docs,
int step,
int *i,
int *t)
{
printf("run weakLearner\n");
int r_train=train_max_docs+1;
int c_train=train_max_words_doc;
float sqrt_c_train_tmp=sqrt(c_train);
int sqrt_c_train = floor(sqrt_c_train_tmp);
int value_one = 1;
int modulus_number=0;
int floor_number=1;
int sizeOfsqrt_c_train=2*(sqrt_c_train * c_train) - 1;
// temporary Matrix
Matrix* error_max = new Matrix(value_one,sizeOfsqrt_c_train);
Matrix* error = new Matrix(value_one,sizeOfsqrt_c_train);
Matrix* train_error_part = new Matrix(step,floor_number+1);
Matrix* error_abs_label_train = new Matrix(step,floor_number+1);
Matrix label_matrix = (*train_label)>2;
for (int tt=0; tt < 2*(sqrt_c_train * c_train) - 1; tt++)
{
/***********************************************************************************/
/* error(tt)=distribution*abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1))); */
/***********************************************************************************/
//16*(mod(tt,sqrt(c_train))+1)
//sqrt_c_train
modulus_number = 55 + (8 * ((tt % 12) + 1));
//floor(tt/sqrt(c_train))+1
floor_number=floor(tt / 12) + 1;
train_error_part->specificPartOfMatrix(*train,step,floor_number);
// (train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1))
Matrix train_error = (*train_error_part)>modulus_number;
// label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1))
Matrix error_label_train = (label_matrix)-(train_error);
// abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1)))
error_abs_label_train->matrixAbs(error_label_train);
//error(tt)=distribution*abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1)));
Matrix error_tmp = (*distribution)*(*error_abs_label_train);
error->copy(tt,error_tmp);
}
double error_train_array[443];
Matrix* getArrayTrain_error = new Matrix(443,value_one);
getArrayTrain_error->matrixToArray(error_train_array,*error);
double val=0;
int index=0;
double parameter = 0.00;
// [val,tt]=max(abs(error-0.5));
Matrix* errorMinusScalar = new Matrix(value_one,sizeOfsqrt_c_train);
errorMinusScalar->matrixMinusSclar(parameter,*error);
Matrix* errorAbsMinusScalar = new Matrix(value_one,sizeOfsqrt_c_train);
errorAbsMinusScalar->matrixAbs(*errorMinusScalar);
error_max->matrixMax(&val,&index,*errorAbsMinusScalar);
// i=floor(tt/16)+1;
(*i) = floor(index/2)+1;
//(*i) = index;
// t=16*(mod(tt,4)+1);
(*t) = 3 * sqrt_c_train * ((index % sqrt_c_train) + 1);
free(error_max);
free(error);
free(train_error_part);
free(error_abs_label_train);
free(errorMinusScalar);
free(errorAbsMinusScalar);
return (0);
}
/*
function [errorTrain,errorTest]=getError(boost,train,train_label,test,test_label)
disp('run getError');
d=size(boost);
num=size(train);
prediction=zeros(num(1),1);
% geting the train error
for h=1:d(1)
prediction=prediction-log(boost(h,1))*(train(:,boost(h,2))>=boost(h,3));
end
temp=-sum(log(boost(:,1)))/2;
errorTrain=sum(abs((train_label>=5)-(prediction>=temp)))/num(1);
prediction=zeros(1000,1);
% geting the test error
for h=1:d(1)
prediction=prediction-log(boost(h,1))*(test(:,boost(h,2))>=boost(h,3));
end
errorTest=sum(abs((test_label>=5)-(prediction>=temp)))/1000;
*/
int getError(Matrix* boost,
Matrix* iii,
Matrix* ttt,
Matrix* train_matrix,
Matrix* train_label_matrix,
Matrix* test_matrix,
Matrix* test_label_matrix,
Matrix* errorTrain,
Matrix* errorTest,
long train_max_words_doc,
long train_max_docs,
int step,
int cycles)
{
double value_one = 1;
double value_two = 2;
// d=size(boost);
int boost_row;
int boost_col;
Matrix* size = new Matrix(step,step);
size->matrixSize(&boost_row,&boost_col,*boost);
// num=size(train_matrix);
int train_row;
int train_col;
size->matrixSize(&train_row,&train_col,*train_matrix);
// num=size(test_matrix);
int test_row;
int test_col;
size->matrixSize(&test_row,&test_col,*test_matrix);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -