📄 adaboost_common.cpp
字号:
delete transposeExpMatrix;
delete expMatrix;
delete distribution;
delete error;
delete beta;
delete getArrayboost;
return(0);
}
/******************************************************************************************/
/* function [i,t] = weakLearner(distribution,train,label) */
/* %disp('run weakLearner'); */
/* for tt=1:(16*256-1) */
/* error(tt)=distribution*abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1))); */
/* end */
/* [val,tt]=max(abs(error-0.5)); */
/* i=floor(tt/16)+1; */
/* t=16*(mod(tt,16)+1); */
/******************************************************************************************/
int weakLearner(Matrix* distribution,
Matrix* train,
Matrix* i_all,
Matrix* t_all,
Matrix label_matrix,
long train_max_words_doc,
long train_max_docs,
int step,
int *i,
int *t,
int cycles,
int j,
double ave_delta)
{
/*if (com_pro.show_action)
// printm("run weakLearner %d\n times, j");
if (com_pro.show_compute_1)
{
sprintf(temstr,"run weakLearn %.d\n times",j);
printm(temstr);
}*/
int r_train=train_max_docs+1;
int c_train=train_max_words_doc;
float sqrt_c_train_tmp=sqrt(c_train);
int sqrt_c_train = floor(sqrt_c_train_tmp);
int value_one = 1;
int modulus_number=0;
int floor_number=1;
int sizeOfsqrt_c_train=2*sqrt_c_train*(c_train- 1);
// double val_error;
int counter=1;
if (step < 100){
step=r_train;
}
// New temporary matrix's
Matrix* error = new Matrix(value_one,sizeOfsqrt_c_train);
Matrix* error_max = new Matrix(value_one,sizeOfsqrt_c_train);
//Matrix* matrix_val_error = new Matrix(value_one,value_one);
Matrix* get_i_all = new Matrix(step,value_one);
Matrix* get_t_all = new Matrix(step,value_one);
Matrix* Ava_data_matrix = new Matrix(step,c_train-1);
for (int tt=0; tt < 12*(c_train - 1); tt++)
{
Matrix* train_error_part = new Matrix(step,2);
Matrix* error_abs_label_train = new Matrix(step,2);
/***********************************************************************************/
/* error(tt)=distribution*abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1))); */
/***********************************************************************************/
//16*(mod(tt,sqrt(c_train))+1)
//sqrt_c_train
modulus_number = 69 + (5 * (tt % 12)+1);
//floor(tt/sqrt(c_train))+1
floor_number=floor(tt / 11) + 1;
if (step < 100){
step=r_train;
}
train_error_part->specificPartOfMatrix(*train,step,floor_number-1);
// (train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1))
Matrix train_error = (*train_error_part)>modulus_number;
// label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1))
Matrix error_label_train = (label_matrix)-(train_error);
// abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1)))
error_abs_label_train->matrixAbs(error_label_train);
//error(tt)=distribution*abs(label-(train(:,floor(tt/16)+1)>=16*(mod(tt,16)+1)));
Matrix error_tmp = (*distribution)*(*error_abs_label_train);
error->copy(tt,error_tmp);
//free matrix
delete train_error_part;
delete error_abs_label_train;
}
//double error_train_array[432];
//Matrix* getArrayTrain_error = new Matrix(432,value_one);
//getArrayTrain_error->matrixToArrayold(error_train_array,*error);
//matrix_val_error->getValueSpecific(0,0,&val_error,*error);
//[val,tt]=max(abs(error-0.5));
double val=0;
int index=0;
double parameter_05 = 0.5;
Matrix* errorMinusScalar = new Matrix(value_one,sizeOfsqrt_c_train);
errorMinusScalar->matrixMinusSclar(parameter_05,*error);
Matrix* errorAbsMinusScalar = new Matrix(value_one,sizeOfsqrt_c_train);
errorAbsMinusScalar->matrixAbs(*errorMinusScalar);
error_max->matrixMax(&val,&index,*errorAbsMinusScalar);
/************************/
/* i=floor(tt/16)+1; */
/* t=16*(mod(tt,16)+1); */
/************************/
if (index > 400 && index < 500 ){
(*i) = floor(index/17)+1;
if (*i >= (c_train-1)){
*i = (c_train-10);
}
(*t) = 12 * ((index % 12) + 1);
}
if (index > 300 && index < 400 ){
(*i) = floor(index/12)+1;
if (*i >= (c_train-1)){
*i = (c_train-10);
}
(*t) = 12 * ((index % 12) + 1);
}
if (index > 200 && index < 300){
(*i) = floor(index/7)+1;
if (*i >= (c_train-1)){
*i = (c_train-10);
}
(*t) = 12 * ((index % 12) + 1);
}
if (index > 100 && index < 200){
(*i) = floor(index/5)+1;
if (*i >= (c_train-1)){
*i = (c_train-10);
}
(*t) = 12 * ((index % 12) + 1);
}
if (index > 0 && index < 100){
(*i) = floor(index/2)+1;
if (*i >= (c_train-1)){
*i = (c_train-10);
}
(*t) = 12 * ((index % 12) + 1);
}
i_all->setValue(j,*i);
if (j > 2)
{
get_i_all->matrixCompareAndSet(i,c_train,*i_all);
}
t_all->setValue(j,*t);
if (j > 1)
{
get_t_all->matrixCompareAndSetfor_t(j,t,c_train,*t_all);
}
(*t) = (*t) + ave_delta;
delete error_max;
delete error;
delete Ava_data_matrix;
delete errorMinusScalar;
delete errorAbsMinusScalar;
delete get_i_all;
delete get_t_all;
return (0);
}
/**************************************************************************************************/
/* Random Forests grows many classification trees. To classify a new object from an input vector, */
/* put the input vector down each of the trees in the forest. Each tree gives a classification, */
/* and we say the tree "votes" for that class. The forest chooses the classification having the */
/* most votes (over all the trees in the forest). */
/* */
/* Each tree is grown as follows: */
/* -If the number of cases in the training set is N, sample N cases at random - but with */
/* replacement, from the original data. This sample will be the training set for growing the tree.*/
/* -If there are M input variables, a number m<<M is specified such that at each node, m variables*/
/* are selected at random out of the M and the best split on these m is used to split the node. */
/* The value of m is held constant during the forest growing. */
/* -Each tree is grown to the largest extent possible. There is no pruning. */
/* The correlation between any two trees in the forest. Increasing the correlation increases the */
/* forest error rate. The strength of each individual tree in the forest. A tree with a low error */
/* rate is a strong classifier. Increasing the strength of the individual trees decreases the */
/* forest error rate. Reducing m reduces both the correlation and the strength. Increasing it */
/* increases both. Somewhere in between is an "optimal" range of m - usually quite wide. Using */
/* the oob error rate a value of m in the range can quickly be found. */
/* This is the only adjustable parameter to which random forests is somewhat sensitive. */
/**************************************************************************************************/
int randomForest_weakLearner(Matrix* distribution,
int TrainMissingDataProcedure,
int TestMissingDataProcedure,
int missingValueTest,
Matrix* train_matrix,
Matrix* train_label_matrix,
Matrix* test_matrix,
Matrix* test_label_matrix,
long train_max_words_doc,
long train_max_docs,
long test_max_words_doc,
long test_max_docs,
int step,
int cycles_num,
int ntest,
Matrix* train_error_array,
Matrix* test_error_array,
int train_array[],
int label_train_array[],
int test_array[],
int label_test_array[],
int missingValueTrain)
{
//if (com_pro.show_action)
// printm("running Random Forest algorithm");
/****************************/
/* Table of constant values */
/****************************/
static int ipi = 0;
static int value_one=1;
static int value_two = 2;
static int nclass = 2;
static int nsample;
static int nsample_train = train_max_docs;
static int nsample_test = test_max_docs;
static int mdim = train_max_words_doc - 1;
static int nrnodes = 8871;
static int LoopNumber = cycles_num;
static float code = (float)-999.;
static int maxcat = 1;
/*******************/
/* Local variables */
/*******************/
static int jcat, mtab[36] /* was [6][6] */;
static float errc;//, diffmarg[36], grph[26610] /* was [6][2296] */;
static int clts[1], jerr[2296], jest[2296];
static int nuse;
static int msum[36], ncolumn[36];
static double prox[1] /* was [1][1] */;
static float tout[1], rimpmarg[159660] /* was [36][2296] */;
static float classpop[53226] /* was [6][8871] */;
static int countimp[6] /* was [6][1][1] */;
static float rmissimp[1];
static int a[159660] /* was [36][2296] */, b[159660] /* was [36][2296] */, i__, j, k, m, n;
static float p[2296], q[26610] /* was [6][2296] */;
static int ndble[2296];
static double u[1];
static float v[2296], x_train[159660]/* was [36][2296] */,x_test[72072] /* was [36][2002] */;
static int ncase[2296];
static double y[1];
static float graph[6] /* was [6][1][1] */;
static float zz,tgini[36];
static int nodex[2296];
static int ndbigtree, nodeclass[8871];
static float tclasscat[192] /* was [6][32] */,fill[36];
static int isort[2296];
static float errts, errtr;
static int n0, n1, kk;
static float xorts[1] /* was [1][1] */,tsgini[36];
static int nodestart[8871],jints[1];
static float tclasspop[6];
static int bestsplit[8871];
static int jb, cl[2296], nc[6];
static double dl[1];
static int ta[2296], at[159660] /* was [36][2296] */;
static float pi[6], wc[6];
static double cv[1] /* was [1][1] */;
static int iv[36];
static float sm[2296];
static int kt,nr;
static float wl[6], tp[2296];
static int mr, bestsplitnext[8871], ns;
static float wr[6], xs[2296], tx[2296], ys[2296];
static int idmove[2296], parent[8871];
static float errimp[1];
static int cbestsplit[8871] /* was [1][8871] */;
static int nodestatus[8871];
static float xbestsplit[8871];
static int cat[36];
static double red[1];
static float pid[6], classwt[2];
static int clp[1], jin[2296], ncp[1], jet[1];
static double upc[1];
static double ypc[1];
static float win[2296], smo[2296], tdx[2296];
static int jts[1], jtr[2296], jvr[2296];
static double xsc[1] /* was [1][1] */;
static int mnt, out[2296];
static float xor[2296] /* was [1][2296] */, xnt[2296], cntmarg[36];
static int treemap[17742] /* was [2][8871] */;
static float xts[36] /* was [36][1] */, rmargin[2296], wtt[2296];
static int nodepop[8871], bestvar[8871];
static float outlier[1];
static int nodexts[1], counttr[26610] /* was [6][2296] */,nrcat[1];
static float countts[6] /* was [6][1] */;
if (ntest == 1){
nsample = train_max_docs;
}
if (ntest > 1){
nsample = test_max_docs;
}
Matrix* getArray = new Matrix(value_one,value_one);
getArray->copyArrayToArray(nsample_train, mdim,x_train,train_array);
getArray->copyArrayToArray(nsample_test, mdim,x_test,test_array);
/* Change from 7 class 1,2..7 to 2 class 0,1 like in the adaboost algorithm program */
if (ntest == 1)
{
for (kk=0; kk < nsample_train; ++kk)
{
if (label_train_array[kk] > 3){
cl[kk]=1;
}else{
cl[kk]=2;
}
}
}
/**************************/
/* SET CATEGORICAL VALUES */
/**************************/
for (m = 1; m <= mdim; ++m)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -