📄 main_lasso.cpp
字号:
#include <strstream.h>#include <math.h>#include "main_LASSO.h"#include <stdlib.h>#include <stdio.h>#include <string.h>#include <fstream.h>#include <ctype.h>extern "C" int compare (const void * a, const void * b);int compare (const void * a, const void * b){ double s_1, s_2; istrstream issr((char*)a, 1000); issr >> s_1; istrstream issr2((char*)b, 1000); issr2 >> s_2; return (int)((s_2-s_1)/fabs(s_2-s_1));}main_LASSO::main_LASSO(char* dfn, char* afn, int ntot, int dim, double cross_start, double cross_add, int cross_number, int it){ iter = it; ntotal = ntot; dimension = dim+1; retrain = 1; intercept = 1e0; Relg = new My_Matrix(); Relg_Buff = new My_Matrix(); A = new My_Matrix(); XG = new My_Matrix(ntotal, dimension-1); ATest = new My_Matrix(); true_labs = new int[ntotal]; mu = cross_start; mu_buff = mu; int i,j; data_file_name = new char[500]; annot_file_name = new char[500]; strcpy(data_file_name, dfn); strcpy(annot_file_name, afn); cv_start = cross_start; cv_add = cross_add; cv_number = cross_number; hist_wrong_mat = new My_Matrix(200, cv_number+2); hist_correct_mat = new My_Matrix(200, cv_number+2); for ( i = 0; i < 200; i++) for(j=0; j<cv_number+2; j++){ hist_wrong_mat->el(i,j) = 0.0; hist_correct_mat->el(i,j) = 0.0; } rv_hist_mat = new int*[dimension]; for ( i = 0; i < dimension; i++){ rv_hist_mat[i] = new int[cv_number+2]; for(j=0; j<cv_number+2; j++) rv_hist_mat[i][j] = 0; } gene_names = new char*[dimension]; for(i=0; i< dimension; i++) gene_names[i] = new char[1000]; Y = new int[1]; YT = new int[1]; docalc = new int[1];}main_LASSO::~main_LASSO(){ int i; delete A; delete XG; delete ATest; delete Relg; delete Relg_Buff; delete [] Y; delete [] YT; delete hist_wrong_mat; delete hist_correct_mat; delete [] docalc; for ( i = 0; i < dimension; i++) delete [] rv_hist_mat[i]; delete [] rv_hist_mat; for(i=0; i< dimension; i++) delete [] gene_names[i]; delete [] gene_names; }void main_LASSO::randPerm(int* array, int length){ int i,j, ind1, ind2, buff; for(i=0; i<length*70; i++){ ind1 = rand()%length; ind2 = rand()%length; buff = array[ind1]; array[ind1] = array[ind2]; array[ind2] = buff; } }int main_LASSO::cross_rand( double fold ){ int i,j, nT, nL; int* rnd = new int[ntotal]; for(i=0; i< ntotal ; i++) rnd[i] = i; randPerm(rnd, ntotal); nT = int((double)ntotal/fold); int dim = XG->Ncols(); nL = ntotal -nT; A->ReDimension(nL, dim); delete [] Y; Y = new int[nL]; delete [] YT; YT = new int[nT]; ATest->ReDimension(nT, dim); for(i=0; i< nT; i++){ for(j=0; j<dim; j++){ ATest->el(i,j) = XG->el(rnd[i], j); } YT[i] = true_labs[rnd[i]]; } for(i=nT; i< ntotal; i++){ for(j=0; j<dim; j++){ A->el(i-nT,j) = XG->el(rnd[i], j); } Y[i-nT] = true_labs[rnd[i]]; } delete [] rnd; return 1; }void main_LASSO::Enlarge_const(double const_val){ Acols = XG->Ncols()+1; int n = XG->Nrows(); XG->Enlarge(n,Acols); int i; for(i =0; i<n; i++) XG->el(i, Acols-1) = const_val; }double main_LASSO::Predict(int cvi){ int i,j; int c = LM->getRV_numb(); rv_numb = c; double* rv = LM->getRV(); for(i= 0; i < c; i++){ rv_hist_mat[int(rv[i])][cvi] += 1; } double* Projected = LM->getProjected(); double* alpha = LM->getAlpha(); int n = A->Nrows(); int nT = ATest->Nrows(); int k; double* ProjectedTest = new double[nT]; int dimH = ATest->Ncols(); int start = 0; double sum; double diff; double sum1; int do_c; delete [] docalc; docalc= new int[dimH]; for(i=0; i<dimH; i++) docalc[i] = 0; for(k=0; k< c; k++){ docalc[(int) rv[k]] = 1; } for(j=0; j<nT; j++){ sum = 0.0; for(k=0; k<Acols; k++){ if(docalc[k] == 0) continue; sum += ATest->el(j,k)*alpha[k]; } ProjectedTest[j] = sum; } double* pi = new double[ATest->Nrows()]; for(j=0; j<ATest->Nrows();j++){ pi[j] = 1.0/(1.0 + exp( -ProjectedTest[j] )); if(ProjectedTest[j]<0){ pi[j] = 1.0 - pi[j]; } } int* bin = new int[ATest->Nrows()]; double width = 0.05; for(j=0; j<ATest->Nrows();j++){ bin[j] = (int)(pi[j] / width+1e-8); } double summs = 0.; for(j= 0; j < ATest->Nrows(); j++){ if(ProjectedTest[j] >0 && YT[j] ==0){ summs++; hist_wrong_mat->el(bin[j],cvi)++; } else{ if(ProjectedTest[j]<0 && YT[j]==1){ summs++; hist_wrong_mat->el(bin[j],cvi)++; } else{ hist_correct_mat->el(bin[j],cvi)++; } } } delete [] ProjectedTest; delete [] pi; delete [] bin; return summs/ATest->Nrows(); }void main_LASSO::ReadData(){ int i,j; ifstream data_file, annot_file; data_file.open(data_file_name, ios::in); annot_file.open(annot_file_name, ios::in); for(i=0; i< ntotal; i++) data_file >> true_labs[i]; for(j=0; j <dimension-1; j++) for(i=0; i< ntotal; i++) data_file >> XG->el(i,j); for(i=0; i< dimension-1; i++) annot_file.getline(gene_names[i],1000); annot_file.close(); Enlarge_const(intercept); }void main_LASSO::Classify_Reduced(){ int i,j; retrain = 1; transpose(*Relg_Buff, *XG); Enlarge_const(intercept); for(i=0; i<dimension; i++) for(j=0; j<cv_number+2; j++) rv_hist_mat[i][j] = 0; for(i=0; i<50; i++) for(j=0; j<cv_number+2; j++){ hist_wrong_mat->el(i,j) = 0.; hist_correct_mat->el(i,j) =0.; } cout << "*******************************************" << endl; cout << "Re-training with reduced feature set..." <<endl; cout << "*******************************************" << endl; Classify();}void main_LASSO::Classify(){ int i,j,k,l; int min_numb = cv_number + 1; My_Matrix* erg = new My_Matrix(iter, min_numb+1); double const_val = intercept; int fold_n = 5; double zw_e =0.; int* index = new int[XG->Ncols()]; double* beta = new double[XG->Ncols()]; int ri,ii ; double er, rv_sum =0.; double* mu_vec = new double[min_numb+1]; double* rv_vec = new double[min_numb+1]; for(i=0; i< min_numb+1; i++) rv_vec[i] = 0.0; cross_rand(fold_n); for(i=0; i<iter; i++){ mu = mu_buff; mu_vec[0] = mu; cout <<"Iteration: " << i << ". "<< "k = " <<mu <<", "<<flush; cross_rand(fold_n); for(j=0; j<XG->Ncols(); j++){ index[j] =0; beta[j] = 0.0; } ri = rand()%(XG->Ncols()-1); index[ri] = 1; beta[ri] = 1e-7; LM = new LogregLASSO(A, Y, const_val, mu, index, beta); LM->regress(); erg->el(i,0) = Predict(0); rv_vec[0] += rv_numb; for(ii=0; ii< cv_number; ii++){ for(j=0; j<A->Ncols(); j++){ beta[j] = LM->getAlpha()[j]; index[j] = docalc[j]; } mu += cv_add; cout <<"k = "<<mu <<", "<<flush; mu_vec[ii+1] = mu; LM->SetInitVals(index, beta, mu); LM->regress(); erg->el(i,ii+1) = Predict(ii+1); rv_vec[ii+1] += rv_numb; } delete LM; cout << endl; } for(i=0; i< min_numb; i++) rv_vec[i] /= (double)iter; double min_mean = 1e20, best_mu, best_std, best_rv; double meanE = 0.; double spreadE = 0.; int best_cv; cout << endl; cout << endl; cout << "Results for different constraint values kappa:" <<endl; for(ii=0; ii< min_numb; ii++){ meanE = 0.; spreadE = 0.; for(i=0; i<iter; i++) meanE += erg->el(i,ii); meanE /= iter; for(i=0; i<iter; i++) spreadE += pow(erg->el(i,ii)-meanE,2); spreadE /= iter; cout << "kappa: " << mu_vec[ii] <<endl; cout <<"mean error: "<<meanE << endl; if(meanE < min_mean){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -