📄 adaboost.cpp
字号:
/****************************************************************************
NJU Magic. Copyright (c) 2007. All Rights Reserved.
--------------------------------------------------------------------
Permission to use, copy, or modify this software and its documentation
for educational and research purposes only and without fee is hereby
granted, provided that this copyright notice appear on all copies and
supporting documentation. For any other uses of this software, in
original or modified form, including but not limited to distribution
in whole or in part, specific prior permission must be obtained from
NJU Magic and the authors. These programs shall not be used, rewritten,
or adapted as the basis of a commercial software or hardware product
without first obtaining appropriate licenses from NJU Magic. NJU Magic
makes no representations about the suitability of this software for any
purpose. It is provided "as is" without express or implied warranty.
---------------------------------------------------------------------
File: AdaBoost.cpp
Authors: Yao Wei
Date Created : 2007-8-11
****************************************************************************/
#include "AdaBoost.h"
int main(int argc,char *argv[])
{
clock_t t = clock();
char * filename = 0;
char * modelname = 0;
char * outname = 0;
int max_iter = 0;
Boosting boost;
if(argc<=1)
{
printhelp();
return -1;
}
if(isdigit(argv[1][0]))
{
myprint("AdaBoost Train");
max_iter =atoi(argv[1]);
filename = argv[2];
modelname = argv[3];
boost.SetIteration(max_iter);
boost.Train(filename);
boost.SaveModel(modelname);
myprint("Task End");
}
else if(strcmp(argv[1],"-test")==0)
{
myprint("AdaBoost Test");
modelname = argv[2];
filename = argv[3];
boost.LoadModel(modelname);
boost.Test(filename);
myprint("Task End");
}
else if(strcmp(argv[1],"-predict")==0)
{
myprint("AdaBoost Predictt");
modelname = argv[2];
filename = argv[3];
outname = argv[4];
boost.LoadModel(modelname);
boost.Predict(filename,outname);
myprint("Task End");
}
else
printhelp();
double e = clock() - t;
printf("\nTime Cost: %g seconds\n",e/CLOCKS_PER_SEC);
//boost.Free();
return 0;
}
//Construction
Boosting::Boosting():leaner(0), max_iter(0),n_input(0),
n_samples(0), submat(0)
{
}
// Destruction
Boosting::~Boosting()
{
//Free();
}
void Boosting::Preprocess(const Mat &train, const Vec &responses)
{
if (train.rows() != responses.length())
{
printf("ERROR: # train sample != # lable\n");
exit(1);
}
n_samples = train.rows();
n_input = train.cols();
GetClassInfo(responses);
UnrollData(train,responses);
}
void Boosting::GetClassInfo(const Vec &responses)
{
int i = 0, j = 0, k = 0;
double a[100]; //store class label
int b[100] = {0}; //each label's count
double temp;
int count = 0; //#class
while(i < responses.length())
{
temp = responses[i];
k = 0;
while(k < j && temp != a[k])
{
k++;
}
if(k == j) //a new class comes
{
count ++;
b[j] ++; //correspond count+1
a[j] = temp;
j ++;
}
else //the class has been existed
{
b[k] ++; //correspond count+1
}
i ++;
}
//get # class
info.count = count;
//get count of each class label
info.eachcount = new int[count];
for(i = 0; i < count; i ++)
info.eachcount[i] = b[i];
//get concrete class label
info.eachlabel = new double[count];
for(i = 0; i < count; i ++)
info.eachlabel[i] = a[i];
printf("Training data consists of %d classes.\n", count);
printf("They are ");
for(i = 0; i< count; i++)
printf("%g(#%d) ", a[i], b[i]);
printf("repectively.\n");
}
void Boosting::UnrollData(const Mat &train,const Vec &responses)
{
int i, j;
int count = info.count;
//1 vs 1, get sub-matrix(s) according to each class
//that is to say, each sample in a submat shares the same label
//if # class if count, we will get count submat(s) finally
submat = new Mat[count];
for(i=0; i<count; i++)
submat[i].Set(info.eachcount[i],n_input);
for(i=0; i<n_samples; i++)
{
for(j=0; j<count; j++)
{
if(responses[i] == info.eachlabel[j])
{
submat[j].AddRow(train[i]);
break;
}
}
}
}
void Boosting::DoTrain()
{
int count = info.count; //# class
leaner = new WeakLearner[count*(count-1)/2];
int k=0;
//1 vs 1, do count*(count-1)/2 steps of binary training
printf("\n------Training Begin------");
printf("\nThe whole training process contains %d steps of binary training, "
"each training iterarion is %d.",
count*(count-1)/2, max_iter);
for(int i=0; i<count; i++)
{
for(int j=i+1; j<count; j++)
{
printf("\nstep %3d: %g vs %g\n", k+1,
info.eachlabel[i], info.eachlabel[j]);
leaner[k] = TrainOneVsOne(submat[i], submat[j],
info.eachlabel[i], info.eachlabel[j]);
k++;
}
submat[i].Free();
}
printf("\n------Training OK------\n\n");
delete []submat;
}
WeakLearner Boosting::
TrainOneVsOne(const Mat&mat1, const Mat&mat2,double label1,double label2)
{
int i, t;
int n = mat1.rows() + mat2.rows();
int D = mat1.cols();
double* _weights = new double[n];
// Initialize weights
for(i = 0; i< n; i++)
_weights[i] = 1.0 / n;
int* hClassification = new int[n];
WeakLearner wlearner;
wlearner.Init(max_iter, label1, label2);
DecisionStump stump;
// Perform the learning
for(t = 0; t < max_iter; t ++)
{
if(t % 10 == 0)
printf("*...");
// Create the weak learner and train it
stump.RoundTrain(mat1, mat2, _weights);
// Compute the classifications and training error
double epsilon = 0.0;
for(i = 0; i < mat1.rows(); i ++)
{
hClassification[i] = stump.Classify(mat1[i], D);
epsilon += (hClassification[i] == +1) ? 0: _weights[i];
}
for(i = 0; i< mat2.rows(); i++)
{
hClassification[i+mat1.rows()] = stump.Classify(mat2[i], D);
epsilon +=
(hClassification[i+mat1.rows()] == -1) ? 0: _weights[i+mat1.rows()];
}
// Check stopping condition
if(epsilon >= 0.5)
break;
// Calculate alpha
double alpha = 0.5 *log((1 - epsilon) / epsilon);
// Update the weights
double weightsSum = 0.0;
for (i = 0; i < n; i++)
{
_weights[i] *=
exp(-alpha * (i < mat1.rows() ? +1 : -1) * hClassification[i]);
weightsSum += _weights[i];
}
// Normalize
for (i = 0; i < n; i++)
_weights[i] /= weightsSum;
// Store the weak learner and alpha value
wlearner.AddStump(stump, alpha);
}
delete []_weights;
delete []hClassification;
return wlearner;
}
double Boosting::Vote(double *feature,int n_input)
{
if(this->n_input != n_input)
{
fprintf(stderr,
"The feature of test file doesn't match to the model file!\n");
exit(1);
}
int count = info.count;
// dicision[] which store the count of possible
// class according to the weaklearner
int *dicision = new int[count];
memset(dicision, 0, count*sizeof(int));
int i,j;
double tempresult;
// Count the predict result of each weaklearner
for(i=0; i<count*(count-1)/2; i++)
{
tempresult = leaner[i].Classify(feature, n_input);
for(j=0; j<count; j++)
{
if(info.eachlabel[j] == tempresult)
{
dicision[j]++;
break;
}
}
}
// majority vote
double maxcount = dicision[0];
double label = info.eachlabel[0];
for(i=1; i<count; i++)
{
if(maxcount < dicision[i])
{
label = info.eachlabel[i];
maxcount = dicision[i];
}
}
return label;
}
void Boosting::GetSize(const char *filename, int &m, int &n)
{
FILE *f;
int c;
int current_n = 0;
m = 0; //sample number
n = 0; //feature dimension of each sample
if ((f = fopen (filename, "r")) == NULL)
{
fprintf(stderr,"Cannot open the data file!\n");
return;
}
int bc = '\n'; //the character before to c
while((c = getc(f)) != EOF)
{
if(c == 13) //unix '\n' format
c = '\n';
if(c != '\n' && bc == '\n') //skip continuous space
m++;
if(space_or_null(bc) && number(c)) //if a number, ++
current_n ++;
if(c == '\n') //reach the end of one line
{
if(current_n > n)
n = current_n;
current_n = 0;
}
bc = c;
}
fclose(f);
}
void Boosting::ReadData(const char *filename,Mat &data,Vec &label)
{
FILE *f = fopen(filename,"r");
if(f == NULL)
{
fprintf(stderr,"Cannot open the data file!\n");
exit(1);
}
printf("------Read Data Begin------\n");
int row,col;
GetSize(filename,row,col); //get the size of file
printf("Samples: %d, Features: %d\n",row,col-1);
col = col-1; //the last is a label
data.Set(row, col);
label.Set(row);
//read data from file
for(int i = 0; i < row; i ++)
{
for(int j = 0; j < col; j ++)
fscanf(f, "%lf", &data[i][j]);
fscanf(f,"%lf",&label[i]);
}
printf("------Read Data OK------\n\n");
fclose(f);
}
void Boosting::SaveModel(const char *filename)
{
int i, j, k=0;
FILE *f = fopen(filename,"wb");
if (f == NULL)
{
fprintf(stderr,"Cannot open the model file!\n");
exit(1);
}
printf("------Save Model Begin------\n");
fprintf(f,"%d %d\n",n_samples,n_input);
fprintf(f,"%d:",info.count);
for(i = 0; i < info.count; i ++)
fprintf(f," %f",info.eachlabel[i]);
fprintf(f,"\n");
for(i = 0; i < info.count; i++)
{
for(j=i+1; j<info.count; j++)
{
leaner[k].Save(f);
k++;
}
}
printf("------Save Model OK------\n\n");
fclose(f);
}
int Boosting::LoadModel(const char *filename)
{
int i, j, k=0;
FILE *f = fopen(filename,"rb");
if (f == NULL)
{
fprintf(stderr,"Cannot open the model file!\n");
return 0;
}
printf("------Load Model Begin------\n");
fscanf(f,"%d %d",&n_samples,&n_input);
fscanf(f,"%d:",&info.count);
info.eachlabel = new double[info.count];
for(i=0; i<info.count; i++)
fscanf(f,"%lf",&info.eachlabel[i]);
leaner = new WeakLearner[info.count*(info.count-1)/2];
for(i=0; i<info.count; i++)
{
for(j=i+1; j<info.count; j++)
{
leaner[k].Load(f);
k++;
}
}
printf("------Load Model OK------\n\n");
fclose(f);
return 1;
}
void Boosting::Train(const char *filename)
{
Mat train_data;
Vec train_label;
ReadData(filename, train_data, train_label);
Preprocess(train_data, train_label);
train_data.Free();
train_label.Free();
DoTrain();
}
void Boosting::Test(const char *filename)
{
Mat test_data; //test matrix
Vec test_label; //test label
ReadData(filename,test_data,test_label);
printf("------Test Begin------\n");
int i, j;
int correct=0, total = test_label.length();
int length = test_data.cols(); //feature dimension
int count = info.count; //# class
Vec predict_label(total); //predict label
//for each sample, predict its label
for(i = 0; i<total; i++)
predict_label[i] = Vote(test_data[i],length);
//Confusion Matrix
int **confusion = (int**)mymalloc2d(sizeof(int),count, count);
for(i=0; i<count; i++)
{
for(j=0; j<count; j++)
confusion[i][j] = 0;
}
for(i=0; i<total; i++)
{
int ii = Find(test_label[i]);
int jj = Find(predict_label[i]);
confusion[ii][jj]++;
if(test_label[i] == predict_label[i])//predict correct
correct++;
}
printf("Confusion Matrix:\n ");
for(i=0; i<count;i++)
printf("%4g",info.eachlabel[i]);
printf("\n --------------------------------------\n");
for(i=0; i<count; i++)
{
printf("%g|",info.eachlabel[i]);
for(j=0; j<count; j++)
printf("%3d ",confusion[i][j]);
printf("\n");
}
printf("Classify Accuracy: %g (%d/%d)\n",
(double)correct/total,correct,total);
printf("------Test OK------\n");
//free memory
test_data.Free();
test_label.Free();
predict_label.Free();
for(i=0; i<count; i++)
free(confusion[i]);
free(confusion);
}
void Boosting::Predict(const char *filename, const char *outputfile)
{
FILE *f = fopen(filename,"r");
if(f == NULL)
{
fprintf(stderr,"Cannot open the data file!\n");
exit(1);
}
printf("------Read Data Begin------\n");
int row, col;
GetSize(filename,row,col);
int flag = 0;
if((int)col == n_input+1)
{
flag = 1; //the last is a label
col -= 1;
}
printf("Samples: %d, Features: %d\n",row,col);
Mat predict_data(row, col); //predict matrix
int i,j;
double t_label;
for(i=0; i<row; i++) //read data
{
for(j=0; j<col; j++)
fscanf(f, "%lf", &predict_data[i][j]);
if(flag)
fscanf(f,"%lf",&t_label); //skip the label
}
fclose(f);
printf("------Read Data OK------\n\n");
printf("------Predict Begin------\n");
Vec predict_label(row); //predict label
//for each sample, predict its label
for(i = 0; i<row; i++)
predict_label[i] = Vote(predict_data[i],col);
if(outputfile == 0)
f =stdout;
else
f = fopen(outputfile,"wb");
//write predict result to a outfile
printf("----Write result to file----\n");
for(i=0; i<row; i++)
fprintf(f,"%g\n",predict_label[i]);
printf("----Write Done----\n");
printf("------Predict OK------\n");
//free memory
predict_label.Free();
predict_data.Free();
}
void Boosting::Free()
{
for(int i = 0; i < info.count*(info.count-1)/2; i++)
leaner[i].Free();
delete []leaner;
delete []info.eachcount;
delete []info.eachlabel;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -