📄 main.c
字号:
seed=time(0)+iii; random_initialize(seed); /*---------------------------------------------------------*/ /* Unblocking the two lines above will give you the same */ /* training and test sets when you re-run the code with */ /* different flag(s) in the Makefile, e.g, AUTOSCALE vs. */ /* the default (no autoscale).This is particularly useful, */ /* if you want to compare the results from different */ /* standardization procedures. */ /*---------------------------------------------------------*/ splitIntoTrainingTest(expValue,numVariables,sample,numSamples,class,numTraining); class=assign_class(sample,numSamples); } if (application==3) { /*---------------------------------------------------------*/ /* always take the last one as the leave-one out sample */ /*---------------------------------------------------------*/ if (iii !=0) move_LOO_bottom(expValue,numVariables,sample,numSamples,iii); } numSolutionObtained=0; do { initialize_chr(populationSize,numVariables,chromosomeLength,allChr,numNiches); solutionFound=0; for (ii=0; ii<numGenerations; ii++) { for (jj=0; jj<numNiches; jj++) { for (kk=0; kk<populationSize; kk++) { distance(expValue,allChr[jj][kk],chromosomeLength,numTraining,neighbors,knn); predict_class(sample,numTraining,class,neighbors,knn,predictedClass,majorityRule); (*(*(fitness+jj)+kk)).value=cal_fitness(sample,predictedClass,numTraining); (*(*(fitness+jj)+kk)).index=kk; } sort_fitness(fitness[jj],populationSize); (*(niche+jj)).index=jj; (*(niche+jj)).value=(*(*(fitness+jj))).value; if (fitness[jj][0].value >= target_R2) { /* a near-optimal solution has been obtained */ for (i=0; i<chromosomeLength; i++) bestChr[i]=allChr[jj][fitness[jj][0].index][i]; solutionFound=1; if (printFitnessScore) printf("generation[%4d]: fitness score: %4d\n",ii+1,(int)fitness[jj][0].value); /*----------------------------------------------*/ /* if a solution is found, break numNiches loop */ /*----------------------------------------------*/ break; } } if (solutionFound) { /*-------------------------------------------------*/ /* find out which genes are on the chromosome and */ /* update the number of times it has been selected.*/ /*-------------------------------------------------*/ for (i=0; i<chromosomeLength; i++) { for (j=0; j<numVariables; j++) { if (*(bestChr+i)==j) { (selectCount[j].total)++; break; } } } numSolutionObtained++; /*------------------------------------------------------*/ /* you can print the individual solutions into a file */ /*------------------------------------------------------*/ /* for (i=0; i<chromosomeLength; i++) */ /* fprintf(fq,"%4d ",bestChr[i]); */ /* fprintf(fq,"\n"); fflush(fq); */ /*------------------------------------------------------*/ break; } else { sort_fitness(niche,numNiches); if (printFitnessScore) printf("generation[%4d]: fitness score: %4d\n",ii+1,(int)niche[0].value); /*----------------------------------------------------------*/ /* Replace the worst chromosomes (total: numNiches) in each */ /* niche by the best chromsomes (one from each niche). */ /*----------------------------------------------------------*/ m=populationSize-1; n=0; do { bestNicheId=niche[n].index; bestChrId =fitness[bestNicheId][0].index; for (jj=0; jj<numNiches; jj++) { worstChrId=fitness[jj][m].index; for (i=0; i<chromosomeLength; i++) allChr[jj][worstChrId][i]=allChr[bestNicheId][bestChrId][i]; fitness[jj][m].value=niche[n].value; } /*---------------------------------------------*/ /* if numNiches is greater than populationSize */ /*---------------------------------------------*/ if (m==0) break; m--; n++; } while (n<numNiches); for (jj=0; jj<numNiches; jj++) { /*------------------------------------------------------*/ /* The worst chromosomes have been replaced by the best */ /* chromosomes, the <fitness> array needs to be updated */ /*------------------------------------------------------*/ sort_fitness(fitness[jj],populationSize); /* fitness score based selection - roulette-wheel */ roulett_wheel(fitness[jj],populationSize,weight); mutation(allChr[jj],numVariables,chromosomeLength,populationSize,weight); } } } if (numSolutionObtained !=0) { if (numSolutionObtained%10==0 || numSolutionObtained==numSolutionsSpecified) { if (application==4) printf("split[%d]: number of near-optimal solutions obtained so far: %d\n",iii+1,numSolutionObtained); else if (application==3) printf("leave-sample[%d]-out: number of near-optimal solutions obtained so far: %d\n", numSamples-iii,numSolutionObtained); else printf("number of near-optimal solutions obtained so far: %d\n",numSolutionObtained); } /* writes out the results every some steps */ if (numSolutionObtained%500==0 || numSolutionObtained==numSolutionsSpecified) { f_update=fopen("selection_count.txt","w"); fprintf(f_update,"Total number of near-optimal solutions obtained so far: %d\n\n",numSolutionObtained); fprintf(f_update,"This file can be sorted using unix command:\n"); fprintf(f_update," sort -k2 -r selection_count.out > sorted_output.txt\n"); fprintf(f_update,"It can also be displayed using any data display program.\n\n"); fprintf(f_update,"Gene ID No. of times it being selected Freq.\n"); fprintf(f_update,"------- ------------------------------ -----\n"); for (j=0; j<numVariables; j++) fprintf(f_update,"%5d %15d %5.3f\n", selectCount[j].index+1,selectCount[j].total,(double)selectCount[j].total/(double)numSolutionObtained); fprintf(f_update,"----------------------------------------------------\n"); fprintf(f_update,"Total number of variables (genes, m/z): %d\n",numVariables); fprintf(f_update,"Chromosome length (d): %d\n",chromosomeLength); t=time(NULL); c=asctime(localtime(&t)); fprintf(f_update,"\nFinished: %s",c); fclose(f_update); /*------------------------------------------------------------------*/ /* <selectCount> array contains the number of times a gene is being */ /* selected. It is being updated as the search continues. Thus a */ /* copy of it needs to be made before it is sorted. */ /*------------------------------------------------------------------*/ for (i=0; i<numVariables; i++) { countCopy[i].total=selectCount[i].total; countCopy[i].index=selectCount[i].index; } sort_count(countCopy,numVariables); /* only output at most the top 500 genes, see ga_knn.h for details */ if (numVariables>NUM_TOP_GENES) output_rank_list(countCopy,expValue,numSamples,NUM_TOP_GENES,sample,variableName); else output_rank_list(countCopy,expValue,numSamples,numVariables,sample,variableName); if (application!=3) { /* update test set prediction result */ f_update=fopen("predict_test_update.txt","w"); fprintf(f_update," number solutions obtained: %d\n",numSolutionObtained); fprintf(f_update,"\n----------------test set [%d]------------------\n\n",iii+1); fprintf(f_update," original classes: "); for (j=0; j<numTesting; j++) fprintf(f_update,"%1d",sample[j+numTraining].class); fprintf(f_update,"\n"); /* if all num of solutions obtained = num of solutions specified */ if (numSolutionObtained==numSolutionsSpecified) { fprintf(fq3,"\ntest set [%4d]:\n",iii+1); fprintf(fq3," sample index: "); for (j=0; j<numTesting; j++) fprintf(fq3,"%4d",sample[j+numTraining].id+1); fprintf(fq3,"\n"); fprintf(fq3," original classes: "); for (j=0; j<numTesting; j++) fprintf(fq3,"%4d",sample[j+numTraining].class); fprintf(fq3,"\n"); } } else { /* print out update every some solutions */ f_update=fopen("loocv_update.txt","w"); fprintf(f_update," number solutions obtained: %d\n",numSolutionObtained); fprintf(f_update,"\n----------------left-out-sample[%d]------------------\n\n",numSamples-iii); fprintf(f_update," original classes: "); for (j=0; j<numTesting; j++) fprintf(f_update,"%1d",sample[j+numTraining].class); fprintf(f_update,"\n"); /* when all num of solutions obtained = num of solutions specified */ if (numSolutionObtained==numSolutionsSpecified) { fprintf(fq3,"Note: class type 99: a sample can't be classified to a single class\n"); fprintf(fq3,"left-out-sample[%4d]:\n",numSamples-iii); fprintf(fq3," original classes: "); for (j=0; j<numTesting; j++) fprintf(fq3,"%4d",sample[j+numTraining].class); fprintf(fq3,"\n"); } } for (i=0; i<200; i++) { numTopGenes =i+1; if (numTopGenes>=numVariables) break; distance_test(expValue,numTraining,numTesting,numTopGenes,countCopy,neighbors,knn); predict_class(sample,numTesting,class,neighbors,knn,predictedClass,majorityRule); if (application!=3) { fprintf(f_update,"predicted classes: "); for (j=0; j<numTesting; j++) fprintf(f_update,"%1d",predictedClass[j]); fprintf(f_update," top-ranked variables (gene, m/z)=%d\n",numTopGenes); if (numSolutionObtained==numSolutionsSpecified) { fprintf(fq3,"top[%3d] predicted: ",numTopGenes); for (j=0; j<numTesting; j++) fprintf(fq3,"%4d",predictedClass[j]); fprintf(fq3,"\n"); fflush(fq3); } totalCorrectOneRun=0; for (j=0; j<numTesting; j++) { if (predictedClass[j]==sample[j+numTraining].class) totalCorrectOneRun++; } percentCorrectOneRun[i]=(double)totalCorrectOneRun/(double)numTesting; } else { fprintf(f_update,"predicted classes: "); for (j=0; j<numTesting; j++) fprintf(f_update,"%1d",predictedClass[j]); fprintf(f_update," number of top-ranked variables (genes or m/z) used in prediction: %d\n",numTopGenes); if (numSolutionObtained==numSolutionsSpecified) { fprintf(fq3,"top[%3d] predicted: ",numTopGenes); for (j=0; j<numTesting; j++) fprintf(fq3,"%4d",predictedClass[j]); fprintf(fq3,"\n"); fflush(fq3); } } } fclose(f_update); } } else { printf("The maximal number of generations has reached. No solution is found!\n"); printf("Make sure the termination cutoff (%d) is not too stringent.\n",target_R2); } } while (numSolutionObtained<numSolutionsSpecified); if (application==4) { fprintf(fq2,"split[%4d] top-ranked variables (genes,m/z) and percentage of correct prediction:\n",iii+1); for (i=0; i<200; i++) { if ((i+1)>=numVariables) break; fprintf(fq2,"%5d ",i+1); } fprintf(fq2,"\n"); if (numTesting !=0) { for (i=0; i<40; i++) { if ((i+1)>=numVariables) break; fprintf(fq2,"%5.3f ",percentCorrectOneRun[i]); } fprintf(fq2,"\n"); fflush(fq2); } } } fclose(fq3); if (application==4) fclose(fq2); if (sample) { destroy_sample(sample,numSamples); } if (class) { destroy_class(class); } if (neighbors[0]) { free(neighbors[0]); neighbors[0]=NULL; } if (neighbors) { free(neighbors); neighbors=NULL; } if (fitness[0]) { free(fitness[0]); fitness[0]=NULL; } if (fitness) { free(fitness); fitness=NULL; } if (niche) { free(niche); niche=NULL; } if (weight) { free(weight); weight=NULL; } if (predictedClass) { free(predictedClass); predictedClass=NULL; } if (expValue[0]) { free(expValue[0]); expValue[0]=NULL; } if (expValue) { free(expValue); expValue=NULL; } if (allChr[0][0]) { free(allChr[0][0]); allChr[0][0]=NULL; } if (allChr[0]) { free(allChr[0]); allChr[0]=NULL; } if (allChr) { free(allChr); allChr=NULL; } if (bestChr) { free(bestChr); bestChr= NULL; } if (selectCount) { free(selectCount); selectCount=NULL; } if (variableName) { free(variableName); variableName=NULL; } if (countCopy) { free(countCopy); countCopy=NULL; } if (missingIndicator[0]) { free(missingIndicator[0]); missingIndicator[0]=NULL; } if (missingIndicator) { free(missingIndicator); missingIndicator=NULL; } return (1);}/* this piece has been rewritten. It should be more robust than the previous one */double **read_data(char *inputFile,char *dataFileName,int numSamples, int numVariables,char **variableName,SampleInfo *sample,char **missingIndicator) { FILE *fp; int columnCount,rowCount; int tabCount,missingCount,len,numWhiteSpace; int *tabPosition; double **expValue; char buffer[10000],tmp[10000]; register int i,j,k; fp=fopen(dataFileName,"r"); if (!fp) { perror(dataFileName); printf("\nMake sure the data file Name and Location (path) are exactly\n"); printf(" the same as they are specified in %s.\n\n",inputFile); exit(0); } for (i=0; i<numSamples; i++) sample[i].id=i; /*-------------------------------------------------------------------*/ /* reading sample name */ /*-------------------------------------------------------------------*/ tabPosition=alloc_int(numSamples+5); fgets(buffer,10000,fp); len=strlen(buffer); buffer[len-1]='\0'; tabCount=0; for (i=0; i<len; i++) { if(buffer[i]=='\t') {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -