📄 adaboost_common.cpp

📁 adaboost code in matlab
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	{
		cat[m - 1] = 1;
	}

    randomForest *zer_array = new randomForest(1,1);

	if (missingValueTrain == 1){
		
		/* 	GIVE CLASS WEIGHTS */
		for (j = 1; j <= 2; ++j) {
			classwt[j - 1] = (float)1.;
		}
        
		/*********************************************************/
		/* There are number of ways to replacing missing values. */
		/*********************************************************/

		/******************************************************************************************/
	    /* This way it begins by doing a rough and inaccurate filling in of the missing values.   */
		/* Then it does a forest run and computes proximities. If x(m,n) is a missing continuous  */
		/* value, estimate its fill as an average over the non-missing values of the mth variables*/
		/* weighted by the proximities between the nth case and the non-missing value case.       */
		/* If it is a missing categorical variable, replace itby the most frequent non-missing    */
		/* value where frequency is weighted by proximity.                                        */
		/******************************************************************************************/
		if (TrainMissingDataProcedure == 1){
		   zer_array->roughfix(x_train, v, ncase, &mdim, &nsample, xts, &maxcat, cat, &code, nrcat, &maxcat, fill);
		}

		/********************************************************************************/ 
		/* K-nearest neighbor method to estimate values for the missing expression data.*/
		/********************************************************************************/
		if (TrainMissingDataProcedure == 2){
			zer_array->Knearestneighborfix(x_train, ncase, &mdim, &nsample, cat, &code);
		}

		/****************************************************************************************/
		/* Ignore procedure simply ignore from missing data and return smallest matrix without  */
		/* this -999                                                                            */
		/****************************************************************************************/
		if (TrainMissingDataProcedure == 3){
				zer_array->Ignorefix(x_train,v, ncase, &mdim, &nsample, cat, &code, ncolumn);
		}

		/****************************/
		/* 	COUNT CLASS POPULATIONS */
		/****************************/
		zer_array->zerv(nc, &nclass);
		for (n = 1; n <= nsample_train; ++n) {
			if (cl[n - 1] < 1 || cl[n - 1] > 2) {
				if (com_pro.show_action)
					printm("error in class label");
			}
			++nc[cl[n - 1] - 1];
		}
    }
	/****************************************************************/
	/* Subroutine/Functions for new array e.g. countts, counttr ... */
	/****************************************************************/
	zer_array->zermr(countts, &nclass, &ntest);
    zer_array->zerm(counttr, &nclass, &nsample);
    zer_array->zerv(out, &nsample);
    zer_array->zervr(tgini, &mdim);
    zer_array->zerv(msum, &mdim);
    zer_array->zermd(prox, &value_one, &value_one);
	
	/***************************************************************************************/
	/* The function do numbers of things:                                                  */
	/* 1. Find how many time the class appear at the class column , e.g. for satimage.tra  */
	/*   class 1 appear 38 times, class 2 appear 436 ...                                   */
	/* 2. divid the class appearance (e.g. 38) to the number of rows (e.g. 2296).          */
	/* 3. Normalize the results and if there is class (e.g. 1) the wtt array will get 0.00 */
	/***************************************************************************************/
	if (missingValueTrain == 0){
		zer_array->prep(cl, &nsample, &nclass, &ipi, pi, pid, nc, wtt);
	}
	/***********************************************************************************/
	/* submakea constructs the mdim x nsample int array a.if there are less than       */
    /* 32,000 cases, this can be declared int*2, otherwise int*4. For each             */
    /* numerical variable with values x(m,n), n=1,...,nsample, the x-values are sorted */
    /* from lowest to highest.  Denote these by xs(m,n).  Then a(m,n) is the case      */
    /* number in which xs(m,n) occurs. The b matrix is also contructed here.           */
    /* if the mth variable is categorical, then a(m,n) is the category of the nth case */
    /* number.                                                                         */ 
	/***********************************************************************************/
    zer_array->makea(x_train,&mdim,&nsample,cat,isort,v,at,b,&mdim,ncolumn);

	if (ntest > 1)
	{
		for (m = 1; m <= 36; ++m) 
		{
			cat[m - 1]=1;
		}
		
		for (j = 1; j <= 6; ++j) 
		{
			pid[j - 1]=0.000000;
		}
	}

	/*************/
	/* START RUN */
	/*************/

	/************************************************/
	/* Build new array by zerv, zerm .. Subroutines */
	/************************************************/
	zer_array->zerv(nodestatus, &nrnodes);
	zer_array->zerm(treemap, &value_two, &nrnodes);
	zer_array->zervr(xbestsplit, &nrnodes);
	zer_array->zerv(nodeclass, &nrnodes);
	zer_array->zerv(jin, &nsample);
	zer_array->zervr(tclasspop, &nclass);
	zer_array->zervr(win, &nsample);
    zer_array->zerv(bestvar, &nrnodes);
		
	if (missingValueTrain == 1){
		for (n = 1; n <= nsample; ++n) {
			/******************************************************/
			/* k is rundom number: k = (int) (rrand_() * 155) + 1 */
			/******************************************************/
			float ret_val=0;
			zer_array->rrand(&ntest,&ret_val);
			k= (ret_val * nsample) + 1;
			if (k >= nsample){
				zer_array->rrand(&ntest,&ret_val);
				k= (ret_val * nsample) + 1;
			}
			/**************************************************/
			/* The arrary "win" also get 0 or 1 according to k*/
			/**************************************************/
			win[k - 1] += classwt[cl[k - 1] - 1];
            /**********************************************/
			/* The arrary "jin" also get 1 according to k */
			/**********************************************/
			++jin[k - 1];
            /*********************************************************************************************/
			/* According to k we find the class number at data file e.g. k=300 cl[k-1]=3, k=46 cl[k-1]=7,*/
			/* and we update new array "tclasspop". the arrary get 1 is the class exist, and O if not.   */
			/*********************************************************************************************/			
			tclasspop[cl[k - 1] - 1] += classwt[cl[k - 1] - 1];
		}
	}
    if (missingValueTrain == 0){
		for (n = 1; n <= nsample; ++n) 
		{
			/**********************/
			/* k is rundom number */
			/**********************/
			float ret_val=0;
			zer_array->rrand(&ntest,&ret_val);
			k= (ret_val * nsample) + 1;
			//k = (int) (rrand(&ntest) * 2296) + 1;
			if (k >= nsample){
				zer_array->rrand(&ntest,&ret_val);
				k= (ret_val * nsample) + 1;
			}			
			/*********************************************************************************************/
			/* According to k we find the class number at data file e.g. k=300 cl[k-1]=3, k=46 cl[k-1]=7,*/
			/* and we update new array "tclasspop". the arrary get 1 is the class exist, and O if not.   */
			/*********************************************************************************************/			
			tclasspop[cl[k - 1] - 1] += wtt[k - 1];			
			/**************************************************/
			/* The arrary "win" also get 0 or 1 according to k*/
			/**************************************************/
			win[k - 1] += wtt[k - 1];			
			/**********************************************/
			/* The arrary "jin" also get 1 according to k */
			/**********************************************/
			jin[k - 1] = 1;
		}
	}

	/***************************************************************************************************/
	/* The Function / Subroutine build A new array that equal to the location of x-value before sort   */
	/*  e.g. for column 1, before sort: v[1909]=40, after sort: v[1]=40 isort=1909.                    */
	/*  it's mean at this Subroutine array we transfar from k[m1 + n1 * k_dim1] to j[m1 + n1 * j_dim1] */
	/* and we get new "j" array:j[]=1909,j[]=1576 ....                                                 */
	/***************************************************************************************************/
	zer_array->eqm(a, at, &mdim, &nsample,ncolumn);
	
	/*******************************************************************************************************/
	/*The Function / Subroutine update the "a" array e.g. a[37]=2005 -> a[37]=1909, a[73]=530 -> a[73]=1576*/
	/*******************************************************************************************************/
	zer_array->moda(a, &nuse, &nsample, &mdim, cat, &ntest, ncase, jin, ta,ncolumn);
	
	/***********************************************************************************************/
	/* Buildtree consists of repeated calls to two subroutines, Findbestsplit and Movedata.        */
	/* Findbestsplit does just that--it finds the best split of the current                        */
	/* node.  Movedata moves the data in the split node right and left so that the data            */
	/* corresponding to each child node is contiguous.                                             */
	/* The buildtree bookkeeping is different from that in Friedman's original CART program.       */
	/* ncur is the total number of nodes to date.  nodestatus(k)=1 if the kth node has been split. */
	/* nodestatus(k)=2 if the node exists but has not yet been split, and =-1 of the node is       */
	/* terminal.  A node is terminal if its size is below a threshold value, or if it is all       */
	/* one class, or if all the x-values are equal.  If the current node k is split, then its      */
	/* children are numbered ncur+1 (left), and ncur+2(right), ncur increases to ncur+2 and        */
	/* the next node to be split is numbered k+1.  When no more nodes can be split, buildtree      */
	/* returns to the main program.                                                                */
	/***********************************************************************************************/
	zer_array->buildtree(a, b, cl, cat, &mdim, &nsample, &nclass, treemap, bestvar, 
		bestsplit, bestsplitnext, tgini, nodestatus, nodepop, nodestart, classpop, tclasspop, tclasscat, ta, &nrnodes, 
		idmove, &ntest, ncase, parent, jin, &nclass, iv, nodeclass, &ndbigtree, win, wr, wc, wl, &mdim, &nuse);
	
	/*************************************************************************************/ 
	/* This subroutine takes the splits on numerical variables and translates them       */
	/* back into x-values.  It also unpacks each categorical split into a 32-dimensional */
	/* vector with components of zero or one--a one indicates that the corresponding     */
	/* category goes left in the split.                                                  */
	/*************************************************************************************/ 
	zer_array->xtranslate(x_train, &mdim, &nrnodes, &nsample, bestvar, bestsplit, bestsplitnext, xbestsplit,
		nodestatus, cat, &ndbigtree);
	
	/************************************************/
	/* GET OUT-OF-BAG ESTIMATES FOR TEST HYPOTHESIS */
	/************************************************/
	ntest = test_max_docs;
	if (missingValueTest == 1)
	{		
		/******************************************************************************************/
		/* There are number of ways to replacing missing values.                                  */
	    /* This way it begins by doing a rough and inaccurate filling in of the missing values.   */
		/* Then it does a forest run and computes proximities. If x(m,n) is a missing continuous  */
		/* value, estimate its fill as an average over the non-missing values of the mth variables*/
		/* weighted by the proximities between the nth case and the non-missing value case.       */
		/* If it is a missing categorical variable, replace itby the most frequent non-missing    */
		/* value where frequency is weighted by proximity.                                        */
		/******************************************************************************************/
		if (TestMissingDataProcedure == 1){
		   zer_array->roughfix(x_test, v, ncase, &mdim, &nsample_test, xts, &maxcat, cat, &code, nrcat, &maxcat, fill);
		}

		/********************************************************************************/ 
		/* K-nearest neighbor method to estimate values for the missing expression data.*/
		/********************************************************************************/
		if (TestMissingDataProcedure == 2){
			zer_array->Knearestneighborfix(x_test, ncase, &mdim, &nsample_test, cat, &code);
		}

		/****************************************************************************************/
		/* Ignore procedure simply ignore from missing data and return smallest matrix without  */
		/* this -999                                                                            */
		/****************************************************************************************/
		if (TestMissingDataProcedure == 3){
				zer_array->Ignorefix(x_test,v, ncase, &mdim, &nsample_test, cat, &code, ncolumn);
		}
    }
	zer_array->testreebag(x_test, &ntest, &mdim, treemap, nodestatus, xbestsplit, cbestsplit, 
		  bestvar, nodeclass, &nrnodes, &ndbigtree, cat, &nclass, jts, nodex, &value_one);
	
	for (int kkk=0; kkk <= ntest; ++kkk) 
	{
		if (jts[kkk] == 2 ){
			jts[kkk]=0;
		}
	}

	/* Put the hypothesis test(jts) into test_error_array Matrix */
	Matrix* get_train_array = new Matrix(1,1);
	get_train_array->copyArrayToMatrix(step,value_one,jts,*test_error_array);

    /* GET OUT-OF-BAG ESTIMATES FOR TRAIN HYPOTHESIS*/
	zer_array->testreebag(x_train, &nsample, &mdim, treemap, nodestatus, xbestsplit, cbestsplit,
		  bestvar, nodeclass, &nrnodes, &ndbigtree, cat, &nclass, jtr, nodex, &ntest);

	for (int kkkk=0; kkkk <= nsample; ++kkkk) 
	{
		  if (jtr[kkkk] == 2 ){
			jtr[kkkk]=0;
		  }
	}
	/* Put the hypothesis train(jtr) into test_error_array Matrix */
	get_train_array->copyArrayToMatrix(step,value_one,jtr,*train_error_array);
	delete get_train_array;
	delete zer_array;
	return (0);
}

/*************************************************************************************/
/* function [errorTrain,errorTest]=getError(boost,train,train_label,test,test_label) */
/*  disp('run getError');                                                            */
/*  d=size(boost);                                                                   */
/*  num=size(train);                                                                 */
/*  prediction=zeros(num(1),1);                                                      */
/*   % geting the train error                                                        */
/*  for h=1:d(1)                                                                     */
/*       prediction=prediction-log(boost(h,1))*(train(:,boost(h,2))>=boost(h,3));    */
/*  end                                                                              */
/*  temp=-sum(log(boost(:,1)))/2;                                                    */
/*  errorTrain=sum(abs((train_label>=5)-(prediction>=temp)))/num(1);                 */
/*    prediction=zeros(1000,1);                                                      */
/*    % geting the test error                                                        */
/*    for h=1:d(1)                                                                   */
/*        prediction=prediction-log(boost(h,1))*(test(:,boost(h,2))>=boost(h,3));    */
/*    end                                                                            */
/*    errorTest=sum(abs((test_label>=5)-(prediction>=temp)))/1000;                   */
/*************************************************************************************/
int getError(Matrix* boost,
			 Matrix* train_hypothesis,
			 Matrix* train_matrix,
			 Matrix* train_label_matrix,
			 Matrix* test_hypothesis,
			 Matrix* test_matrix,
			 Matrix* test_label_matrix,
			 Matrix* errorTrain,
			 Matrix* errorTest,
			 Matrix* tmp_test_hypothesis,
			 long train_max_words_doc,
			 long train_max_docs,
			 int step,
			 int cycles,
			 int sign)
{
	if (com_pro.show_action)
		printm("run getError");
	double value_one = 1;
	double value_two = 2;
	// d=size(boost);
	int boost_row;
	int boost_col;
	Matrix* size = new Matrix(step,step);
	size->matrixSize(&boost_row,&boost_col,*boost);
	
	// num=size(train_matrix);
	int train_row;
	int train_col;
	size->matrixSize(&train_row,&train_col,*train_matrix);
	
    // num=size(test_matrix);
	int test_row;
	int test_col;
	size->matrixSize(&test_row,&test_col,*test_matrix); 
	if (cycles < 100){
		step=train_row-1;
	}
💿 文件大小 5127 K
👤 上传用户 gjq2000
📂 所属分类编译器/解释器
🏷️ 相关标签

#adaboost #matlab #code #in
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -