⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 adaboost.cpp

📁 AdaBoost is an efficient tool in machine learning. It can combine a series of weak learners into a s
💻 CPP
字号:
/****************************************************************************
 NJU Magic.  Copyright (c) 2007.  All Rights Reserved.            
  
--------------------------------------------------------------------
Permission to use, copy, or modify this software and its documentation
for educational and research purposes only and without fee is hereby
granted, provided that this copyright notice appear on all copies and
supporting documentation.  For any other uses of this software, in
original or modified form, including but not limited to distribution
in whole or in part, specific prior permission must be obtained from
NJU Magic and the authors.  These programs shall not be used, rewritten,
or adapted as the basis of a commercial software or hardware product
without first obtaining appropriate licenses from NJU Magic. NJU Magic 
makes no representations about the suitability of this software for any
purpose.  It is provided "as is" without express or implied warranty.

---------------------------------------------------------------------

 File: AdaBoost.cpp                                                                         
 Authors: Yao Wei  
 Date Created    : 2007-8-11
                                          
****************************************************************************/

#include "AdaBoost.h"

int main(int argc,char *argv[])
{
	
	clock_t t = clock();
	char * filename = 0;
	char * modelname = 0;
	char * outname = 0;
	int max_iter = 0;
	Boosting boost;

	if(argc<=1)
	{
		printhelp();
		return -1;
	}
	
	if(isdigit(argv[1][0]))
	{
		myprint("AdaBoost Train");
		max_iter =atoi(argv[1]);
		filename = argv[2];
		modelname = argv[3];

		boost.SetIteration(max_iter);
		boost.Train(filename);
		boost.SaveModel(modelname);

		myprint("Task End");
	}
	else if(strcmp(argv[1],"-test")==0)
	{
		myprint("AdaBoost Test");
		modelname = argv[2];
		filename = argv[3];

		boost.LoadModel(modelname);
		boost.Test(filename);

		myprint("Task End");
	}
	else if(strcmp(argv[1],"-predict")==0)
	{
		myprint("AdaBoost Predictt");
		modelname = argv[2];
		filename = argv[3];
		outname = argv[4];

		boost.LoadModel(modelname);
		boost.Predict(filename,outname);
		
		myprint("Task End");
	}
	else
		printhelp();

	double e = clock() - t;
	printf("\nTime Cost: %g seconds\n",e/CLOCKS_PER_SEC);
	//boost.Free();
	return 0;
}

//Construction
Boosting::Boosting():leaner(0),	max_iter(0),n_input(0),
	n_samples(0), submat(0)
{
	
}

// Destruction
Boosting::~Boosting()
{
	//Free();
}

void Boosting::Preprocess(const Mat &train, const Vec &responses)
{
	if (train.rows() != responses.length())
	{
		printf("ERROR: # train sample != # lable\n");
		exit(1);
	}
	
	n_samples = train.rows();
	n_input = train.cols();
	
	GetClassInfo(responses);
	UnrollData(train,responses);
}

void Boosting::GetClassInfo(const Vec &responses)
{
	int i = 0, j = 0, k = 0;
	double a[100];				//store class label
	int b[100] = {0};			//each label's count
	double temp;
	int count = 0;				//#class
	
	while(i < responses.length())
	{
		temp = responses[i];	
		k = 0;
		while(k < j && temp != a[k])
		{
			k++;
		}
		if(k == j)				//a new class comes
		{
			count ++;
			b[j] ++;			//correspond count+1
			a[j] = temp;
			j ++;
		}
		else					//the class has been existed
		{
			b[k] ++;			//correspond count+1
		}
		i ++;
	}

	//get # class
	info.count = count;
	//get count of each class label
	info.eachcount = new int[count];
	for(i = 0; i < count; i ++)
		info.eachcount[i] = b[i];
	//get concrete class label
	info.eachlabel = new double[count];
	for(i = 0; i < count; i ++)
		info.eachlabel[i] = a[i];

	printf("Training data consists of %d classes.\n", count);
	printf("They are ");
	for(i = 0; i< count; i++)
		printf("%g(#%d) ", a[i], b[i]);
	printf("repectively.\n");
}

void Boosting::UnrollData(const Mat &train,const Vec &responses)
{
	int i, j;
	int count = info.count;
	
	//1 vs 1, get sub-matrix(s) according to each class
	//that is to say, each sample in a submat shares the same label
	//if # class if count, we will get count submat(s) finally
	submat = new Mat[count];
	for(i=0; i<count; i++)
		submat[i].Set(info.eachcount[i],n_input);
	
	for(i=0; i<n_samples; i++)
	{
		for(j=0; j<count; j++)
		{
			if(responses[i] == info.eachlabel[j])
			{
				submat[j].AddRow(train[i]);
				break;
			}
		}
	}
}

void Boosting::DoTrain()
{
	int count = info.count;			//#	class
	leaner = new WeakLearner[count*(count-1)/2];
	int k=0;
	//1 vs 1, do count*(count-1)/2 steps of binary training
	printf("\n------Training Begin------");
	printf("\nThe whole training process contains %d steps of binary training, "
		"each training iterarion is %d.", 
		count*(count-1)/2, max_iter);
	
	for(int i=0; i<count; i++)
	{
		for(int j=i+1; j<count; j++)
		{
			printf("\nstep %3d: %g vs %g\n", k+1, 
				info.eachlabel[i], info.eachlabel[j]);
			leaner[k] = TrainOneVsOne(submat[i], submat[j],
				info.eachlabel[i], info.eachlabel[j]);
			k++;
		}
		submat[i].Free();
	}
	printf("\n------Training OK------\n\n");
	delete []submat;
	
}

WeakLearner Boosting::
TrainOneVsOne(const Mat&mat1, const Mat&mat2,double label1,double label2)
{
	int i, t;
	int n = mat1.rows() + mat2.rows();
	int D = mat1.cols();
	double* _weights = new double[n];
	// Initialize weights
	for(i = 0; i< n; i++)
		_weights[i] = 1.0 / n;
	int* hClassification = new int[n];

	WeakLearner wlearner;
	wlearner.Init(max_iter, label1, label2);
	DecisionStump stump;
	
	// Perform the learning
	for(t = 0; t < max_iter; t ++)
	{
		if(t % 10 == 0)
			printf("*...");
		
		// Create the weak learner and train it
		stump.RoundTrain(mat1, mat2, _weights);
		
		// Compute the classifications and training error
		double epsilon = 0.0;
		for(i = 0; i < mat1.rows(); i ++)
		{
			hClassification[i] = stump.Classify(mat1[i], D);
			epsilon += (hClassification[i] == +1) ? 0: _weights[i];
		}
		for(i = 0; i< mat2.rows(); i++)
		{
			hClassification[i+mat1.rows()] = stump.Classify(mat2[i], D);
			epsilon += 
				(hClassification[i+mat1.rows()] == -1) ? 0: _weights[i+mat1.rows()];
		}

		// Check stopping condition
		if(epsilon >= 0.5)
			break;
		
		// Calculate alpha
		double alpha  = 0.5 *log((1 - epsilon) / epsilon);

		// Update the weights
		double weightsSum = 0.0;
		for (i = 0; i < n; i++)
		{
			_weights[i] *= 
				exp(-alpha * (i < mat1.rows() ? +1 : -1) * hClassification[i]);
			weightsSum += _weights[i];
		}

		// Normalize
		for (i = 0; i < n; i++)
			_weights[i] /= weightsSum;

		// Store the weak learner and alpha value
		wlearner.AddStump(stump, alpha);
	}
	delete []_weights;
	delete []hClassification;

	return wlearner;
}

double Boosting::Vote(double *feature,int n_input)
{
	if(this->n_input != n_input)
	{
		fprintf(stderr,
			"The feature of test file doesn't match to the model file!\n");
		exit(1);
	}	
	
	int count = info.count;
	// dicision[] which store the count of possible 
	// class according to the weaklearner  
	int *dicision = new int[count];
	memset(dicision, 0, count*sizeof(int));
	int i,j;
	double tempresult;
	// Count the predict result of each weaklearner
	for(i=0; i<count*(count-1)/2; i++)
	{	
		tempresult = leaner[i].Classify(feature, n_input);
		for(j=0; j<count; j++)
		{
			if(info.eachlabel[j] == tempresult)
			{
				dicision[j]++;
				break;
			}
		}
	}

	// majority vote
	double maxcount = dicision[0];
	double label = info.eachlabel[0];
	for(i=1; i<count; i++)
	{
		if(maxcount < dicision[i])
		{
			label = info.eachlabel[i];
			maxcount = dicision[i];
		}
	}
	
	return label;			
}

void Boosting::GetSize(const char *filename, int &m, int &n)
{
	FILE *f;
	int c;
	int current_n = 0;
	m = 0;					//sample number
	n = 0;					//feature dimension of each sample
  
	if ((f = fopen (filename, "r")) == NULL)
	{ 
		fprintf(stderr,"Cannot open the data file!\n"); 
		return; 
	}
	int bc = '\n';						//the character before to c
	
	while((c = getc(f)) != EOF) 
	{
		if(c == 13)						//unix '\n'	format
			c = '\n';
		if(c != '\n' && bc == '\n')		//skip continuous space
			m++;
		if(space_or_null(bc) && number(c)) //if a number, ++
			current_n ++;

		if(c == '\n')					//reach the end of one line
		{
			if(current_n > n) 
				n = current_n;
			current_n = 0;
		}
		bc = c;
	}
	fclose(f);
}

void Boosting::ReadData(const char *filename,Mat &data,Vec &label)
{
	FILE *f = fopen(filename,"r");
	if(f == NULL)
	{
		fprintf(stderr,"Cannot open the data file!\n");
		exit(1);
	}
	printf("------Read Data Begin------\n");
	int row,col;
	GetSize(filename,row,col);  //get the size of file
	printf("Samples: %d, Features: %d\n",row,col-1);
	
	col = col-1;				//the last is a label
	data.Set(row, col);
	label.Set(row);

	//read data from file
	for(int i = 0; i < row; i ++)
	{
		for(int j = 0; j < col; j ++)
			fscanf(f, "%lf", &data[i][j]);
		fscanf(f,"%lf",&label[i]);
	}
	printf("------Read Data OK------\n\n");
	
	fclose(f);
}

void Boosting::SaveModel(const char *filename)
{
	
	int i, j, k=0;
	FILE *f = fopen(filename,"wb");
	if (f == NULL)
	{
		fprintf(stderr,"Cannot open the model file!\n");
		exit(1);
	}
	
	printf("------Save Model Begin------\n");
	
	fprintf(f,"%d %d\n",n_samples,n_input);
	fprintf(f,"%d:",info.count);
	
	for(i = 0; i < info.count; i ++)
		fprintf(f," %f",info.eachlabel[i]);
	fprintf(f,"\n");
	
	
	for(i = 0; i < info.count; i++)
	{
		for(j=i+1; j<info.count; j++)
		{
			leaner[k].Save(f);
			k++;
		}
	}

	printf("------Save Model OK------\n\n");
	fclose(f);
}

int Boosting::LoadModel(const char *filename)
{
	
	int i, j, k=0;
	FILE *f = fopen(filename,"rb");

	if (f == NULL)
	{
		fprintf(stderr,"Cannot open the model file!\n");
		return 0;
	}

	printf("------Load Model Begin------\n");
	
	fscanf(f,"%d %d",&n_samples,&n_input);
	fscanf(f,"%d:",&info.count);
	
	info.eachlabel = new double[info.count];
	for(i=0; i<info.count; i++)
		fscanf(f,"%lf",&info.eachlabel[i]);

	leaner = new WeakLearner[info.count*(info.count-1)/2];
	
	for(i=0; i<info.count; i++)
	{
		for(j=i+1; j<info.count; j++)
		{
			leaner[k].Load(f);
			k++;
		}			
	}

	printf("------Load Model OK------\n\n");
	fclose(f);
	return 1;
}

void Boosting::Train(const char *filename)
{
	Mat train_data;
	Vec train_label;
	ReadData(filename, train_data, train_label);
	Preprocess(train_data, train_label);
	train_data.Free();
	train_label.Free();
	DoTrain();
}

void Boosting::Test(const char *filename)
{
	Mat test_data;					//test matrix
	Vec test_label;					//test label
	ReadData(filename,test_data,test_label);
	
	printf("------Test Begin------\n");
	int i, j;
	int correct=0, total = test_label.length();
	int length = test_data.cols();	//feature dimension
	int count = info.count;			//# class
	
	Vec predict_label(total);		//predict label
	//for each sample, predict its label
	for(i = 0; i<total; i++)
		predict_label[i] = Vote(test_data[i],length);
	
	//Confusion Matrix
	int **confusion = (int**)mymalloc2d(sizeof(int),count, count);
	for(i=0; i<count; i++)
	{
		for(j=0; j<count; j++)
			confusion[i][j] = 0;
	}

	for(i=0; i<total; i++)
	{
		int ii = Find(test_label[i]);
		int jj = Find(predict_label[i]);
		confusion[ii][jj]++;
		
		if(test_label[i] == predict_label[i])//predict correct
			correct++;
	}
	
	printf("Confusion Matrix:\n ");
	for(i=0; i<count;i++)
		printf("%4g",info.eachlabel[i]);
	printf("\n  --------------------------------------\n");
	for(i=0; i<count; i++)
	{
		printf("%g|",info.eachlabel[i]);
		for(j=0; j<count; j++)
			printf("%3d ",confusion[i][j]);
		printf("\n");
	}
	
	printf("Classify Accuracy: %g (%d/%d)\n",
		(double)correct/total,correct,total);
	printf("------Test OK------\n");

	//free memory
	test_data.Free();
	test_label.Free();
	predict_label.Free();
	for(i=0; i<count; i++)
		free(confusion[i]);
	free(confusion);
}

void Boosting::Predict(const char *filename, const char *outputfile)
{
	FILE *f = fopen(filename,"r");
	if(f == NULL)
	{
		fprintf(stderr,"Cannot open the data file!\n");
		exit(1);
	}
	printf("------Read Data Begin------\n");
	int row, col;
	GetSize(filename,row,col);
	int flag = 0;
	if((int)col == n_input+1)
	{
		flag = 1;						//the last is a label
		col -= 1;
	}
	printf("Samples: %d, Features: %d\n",row,col);
	
	Mat predict_data(row, col);			//predict matrix
	int i,j;
	double t_label;
	for(i=0; i<row; i++)				//read data
	{
		for(j=0; j<col; j++)
			fscanf(f, "%lf", &predict_data[i][j]);
		if(flag)
			fscanf(f,"%lf",&t_label);	//skip the label
	}
	fclose(f);
	printf("------Read Data OK------\n\n");

	printf("------Predict Begin------\n");
	Vec predict_label(row);				//predict label
	//for each sample, predict its label
	for(i = 0; i<row; i++)
		predict_label[i] = Vote(predict_data[i],col);
	
	if(outputfile == 0)
		f =stdout;
	else
		f = fopen(outputfile,"wb");

	//write predict result to a outfile
	printf("----Write result to file----\n");
	for(i=0; i<row; i++)
		fprintf(f,"%g\n",predict_label[i]);
	printf("----Write Done----\n");
	printf("------Predict OK------\n");
	
	//free memory
	predict_label.Free();
	predict_data.Free();
}


void Boosting::Free()
{
	for(int i = 0; i < info.count*(info.count-1)/2; i++)
		leaner[i].Free();
	delete []leaner;
	delete []info.eachcount;
	delete []info.eachlabel;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -