⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main_lasso.cpp

📁 统计学习软件包
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <strstream.h>#include <math.h>#include "main_LASSO.h"#include <stdlib.h>#include <stdio.h>#include <string.h>#include <fstream.h>#include <ctype.h>extern "C" int compare (const void * a, const void * b);int compare (const void * a, const void * b){    double s_1, s_2;    istrstream issr((char*)a,  1000);    issr >> s_1;    istrstream issr2((char*)b,  1000);    issr2 >> s_2;    return (int)((s_2-s_1)/fabs(s_2-s_1));}main_LASSO::main_LASSO(char* dfn, char* afn, int ntot, int dim, double cross_start, double cross_add, int cross_number, int it){    iter = it;    ntotal = ntot;    dimension = dim+1;    retrain = 1;    intercept = 1e0;    Relg = new My_Matrix();    Relg_Buff = new My_Matrix();       A = new My_Matrix();    XG = new My_Matrix(ntotal, dimension-1);    ATest = new My_Matrix();    true_labs = new int[ntotal];          mu = cross_start;    mu_buff = mu;       int i,j;        data_file_name = new char[500];    annot_file_name = new char[500];       strcpy(data_file_name, dfn);    strcpy(annot_file_name, afn);    cv_start = cross_start;    cv_add = cross_add;    cv_number = cross_number;    hist_wrong_mat = new My_Matrix(200, cv_number+2);    hist_correct_mat = new My_Matrix(200, cv_number+2);    for ( i = 0; i < 200; i++)	for(j=0; j<cv_number+2; j++){	    hist_wrong_mat->el(i,j) = 0.0;	    hist_correct_mat->el(i,j) = 0.0;	} 	        rv_hist_mat = new int*[dimension];    for ( i = 0; i < dimension; i++){  	rv_hist_mat[i] = new int[cv_number+2];	for(j=0; j<cv_number+2; j++)	    rv_hist_mat[i][j] = 0;    }	     gene_names = new char*[dimension];    for(i=0; i< dimension; i++)        gene_names[i] = new char[1000];    Y = new int[1];    YT = new int[1];    docalc = new int[1];}main_LASSO::~main_LASSO(){    int i;    delete A;    delete XG;    delete ATest;    delete Relg;    delete Relg_Buff;    delete [] Y;    delete [] YT;       delete hist_wrong_mat;    delete hist_correct_mat;        delete [] docalc;    for ( i = 0; i < dimension; i++)	delete [] rv_hist_mat[i];    delete [] rv_hist_mat;    for(i=0; i< dimension; i++)	delete []  gene_names[i];    delete [] gene_names;    }void main_LASSO::randPerm(int* array, int length){    int i,j, ind1, ind2, buff;    for(i=0; i<length*70; i++){	ind1 = rand()%length;	ind2 = rand()%length;	buff = array[ind1];	array[ind1] = array[ind2];	array[ind2] = buff;    }	}int main_LASSO::cross_rand( double fold ){    int i,j, nT, nL;    int* rnd = new int[ntotal];    for(i=0; i< ntotal ; i++)	rnd[i] = i;    randPerm(rnd, ntotal);    nT = int((double)ntotal/fold);    int dim = XG->Ncols();        nL = ntotal -nT;    A->ReDimension(nL, dim);          delete [] Y;    Y = new int[nL];    delete [] YT;    YT = new int[nT];        ATest->ReDimension(nT, dim);    for(i=0; i< nT; i++){	for(j=0; j<dim; j++){	    ATest->el(i,j) = XG->el(rnd[i], j);	}	YT[i] = true_labs[rnd[i]];    }        for(i=nT; i< ntotal; i++){	for(j=0; j<dim; j++){	    A->el(i-nT,j) = XG->el(rnd[i], j);	}	Y[i-nT] = true_labs[rnd[i]];    }    delete [] rnd;    return 1; }void main_LASSO::Enlarge_const(double const_val){    Acols = XG->Ncols()+1;    int n = XG->Nrows();        XG->Enlarge(n,Acols);    int i;    for(i =0; i<n; i++)	XG->el(i, Acols-1) = const_val;   }double main_LASSO::Predict(int cvi){  int i,j;   int c = LM->getRV_numb();  rv_numb = c;  double* rv =  LM->getRV();   for(i= 0; i < c; i++){     rv_hist_mat[int(rv[i])][cvi] += 1;  }    double* Projected =  LM->getProjected();   double* alpha =  LM->getAlpha();   int n = A->Nrows();  int nT = ATest->Nrows();  int k;   double* ProjectedTest = new double[nT];   int dimH = ATest->Ncols();  int start = 0;  double sum;  double diff;   double sum1;  int do_c;   delete [] docalc;  docalc= new int[dimH];   for(i=0; i<dimH; i++)    docalc[i] = 0;  for(k=0; k< c; k++){      docalc[(int) rv[k]] = 1;  }  for(j=0; j<nT; j++){         sum = 0.0;     for(k=0; k<Acols; k++){	 if(docalc[k] == 0)	     continue;	 sum += ATest->el(j,k)*alpha[k];     }         ProjectedTest[j] = sum;   }  double* pi = new double[ATest->Nrows()];  for(j=0; j<ATest->Nrows();j++){      pi[j] = 1.0/(1.0 + exp( -ProjectedTest[j] ));      if(ProjectedTest[j]<0){	  pi[j] = 1.0 -  pi[j];      }  }  int*  bin = new int[ATest->Nrows()];  double width = 0.05;  for(j=0; j<ATest->Nrows();j++){      bin[j] = (int)(pi[j] / width+1e-8);  }    double summs = 0.;  for(j= 0; j <  ATest->Nrows(); j++){      if(ProjectedTest[j] >0 && YT[j] ==0){	  summs++;	  hist_wrong_mat->el(bin[j],cvi)++;      }      else{	  if(ProjectedTest[j]<0 && YT[j]==1){	      summs++;	      hist_wrong_mat->el(bin[j],cvi)++;	  }	  else{	      hist_correct_mat->el(bin[j],cvi)++;	  }      }  }   delete [] ProjectedTest;  delete [] pi;  delete [] bin;  return summs/ATest->Nrows();  }void main_LASSO::ReadData(){    int i,j;    ifstream data_file, annot_file;    data_file.open(data_file_name, ios::in);    annot_file.open(annot_file_name, ios::in);        for(i=0; i< ntotal; i++)	data_file >> true_labs[i];    for(j=0; j <dimension-1; j++)	for(i=0; i< ntotal; i++)	    data_file >> XG->el(i,j);         for(i=0; i< dimension-1; i++)	 annot_file.getline(gene_names[i],1000);     annot_file.close();     Enlarge_const(intercept);     }void main_LASSO::Classify_Reduced(){    int i,j;    retrain = 1;    transpose(*Relg_Buff, *XG);    Enlarge_const(intercept);    for(i=0; i<dimension; i++)	for(j=0; j<cv_number+2; j++)	    rv_hist_mat[i][j] = 0;    for(i=0; i<50; i++)	for(j=0; j<cv_number+2; j++){	    hist_wrong_mat->el(i,j) = 0.;	    hist_correct_mat->el(i,j) =0.;    }    cout << "*******************************************" << endl;    cout << "Re-training with reduced feature set..." <<endl;    cout << "*******************************************" << endl;    Classify();}void main_LASSO::Classify(){  int i,j,k,l;    int min_numb = cv_number + 1;  My_Matrix* erg = new My_Matrix(iter, min_numb+1);   double const_val = intercept;  int fold_n = 5;  double zw_e =0.;       int* index = new int[XG->Ncols()];  double* beta = new double[XG->Ncols()];  int ri,ii ;  double er, rv_sum =0.;    double* mu_vec = new double[min_numb+1];  double* rv_vec = new double[min_numb+1];    for(i=0; i< min_numb+1; i++)      rv_vec[i] = 0.0;   cross_rand(fold_n);  for(i=0; i<iter; i++){          mu = mu_buff;      mu_vec[0] = mu;      cout <<"Iteration: " << i << ". "<< "k = " <<mu <<", "<<flush;            cross_rand(fold_n);      for(j=0; j<XG->Ncols(); j++){	  index[j] =0;	  beta[j] = 0.0;      }      ri =  rand()%(XG->Ncols()-1);      index[ri] = 1;      beta[ri] = 1e-7;      LM = new LogregLASSO(A, Y, const_val, mu, index, beta);            LM->regress();      erg->el(i,0) = Predict(0);          rv_vec[0] += rv_numb;            for(ii=0; ii< cv_number; ii++){		  for(j=0; j<A->Ncols(); j++){	      beta[j] =  LM->getAlpha()[j];	      index[j] = docalc[j];	  }	  mu += cv_add;	  cout <<"k = "<<mu <<", "<<flush;	  mu_vec[ii+1] = mu;	   LM->SetInitVals(index, beta, mu);	   LM->regress();	  erg->el(i,ii+1) = Predict(ii+1);	  rv_vec[ii+1] += rv_numb;      }            delete LM;      cout << endl;    }  for(i=0; i< min_numb; i++)      rv_vec[i] /= (double)iter;   double min_mean = 1e20, best_mu, best_std, best_rv;  double meanE = 0.;  double spreadE = 0.;  int best_cv;  cout << endl;  cout << endl;  cout << "Results for different constraint values  kappa:" <<endl;  for(ii=0; ii< min_numb; ii++){      meanE = 0.;      spreadE = 0.;      for(i=0; i<iter; i++)	  meanE += erg->el(i,ii);      meanE /= iter;      for(i=0; i<iter; i++)	  spreadE += pow(erg->el(i,ii)-meanE,2);      spreadE /= iter;      cout << "kappa: " << mu_vec[ii] <<endl;      cout <<"mean error: "<<meanE << endl;      if(meanE < min_mean){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -