⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 GA/KNN 算法源码啊啊啊啊啊啊啊啊啊啊啊啊
💻 C
📖 第 1 页 / 共 5 页
字号:
      seed=time(0)+iii;      random_initialize(seed);      /*---------------------------------------------------------*/      /* Unblocking the two lines above will give you the same   */       /* training and test sets when you re-run the code with    */      /* different flag(s) in the Makefile, e.g, AUTOSCALE vs.   */      /* the default (no autoscale).This is particularly useful, */      /* if you want to compare the results from different       */      /* standardization procedures.                             */      /*---------------------------------------------------------*/         splitIntoTrainingTest(expValue,numVariables,sample,numSamples,class,numTraining);         class=assign_class(sample,numSamples);      }      if (application==3) {         /*---------------------------------------------------------*/         /* always take the last one as the leave-one out sample   */         /*---------------------------------------------------------*/         if (iii !=0)            move_LOO_bottom(expValue,numVariables,sample,numSamples,iii);      }      numSolutionObtained=0;      do {            initialize_chr(populationSize,numVariables,chromosomeLength,allChr,numNiches);         solutionFound=0;         for (ii=0; ii<numGenerations; ii++) {            for (jj=0; jj<numNiches; jj++) {               for (kk=0; kk<populationSize; kk++) {                                  distance(expValue,allChr[jj][kk],chromosomeLength,numTraining,neighbors,knn);                  predict_class(sample,numTraining,class,neighbors,knn,predictedClass,majorityRule);                  (*(*(fitness+jj)+kk)).value=cal_fitness(sample,predictedClass,numTraining);                  (*(*(fitness+jj)+kk)).index=kk;               }               sort_fitness(fitness[jj],populationSize);               (*(niche+jj)).index=jj;               (*(niche+jj)).value=(*(*(fitness+jj))).value;                        if (fitness[jj][0].value >= target_R2) {                  /* a near-optimal solution has been obtained */                  for (i=0; i<chromosomeLength; i++)                     bestChr[i]=allChr[jj][fitness[jj][0].index][i];                     solutionFound=1;                  if (printFitnessScore)                      printf("generation[%4d]: fitness score: %4d\n",ii+1,(int)fitness[jj][0].value);                                   /*----------------------------------------------*/                   /* if a solution is found, break numNiches loop */                  /*----------------------------------------------*/                   break;                }            }            if (solutionFound) {               /*-------------------------------------------------*/                /* find out which genes are on the chromosome and  */               /* update the number of times it has been selected.*/               /*-------------------------------------------------*/                for (i=0; i<chromosomeLength; i++) {                  for (j=0; j<numVariables; j++) {                     if (*(bestChr+i)==j) { (selectCount[j].total)++; break; }                  }               }               numSolutionObtained++;                  /*------------------------------------------------------*/                /*   you can print the individual solutions into a file */                /*------------------------------------------------------*/                /* for (i=0; i<chromosomeLength; i++)                   */               /*    fprintf(fq,"%4d ",bestChr[i]);                    */               /* fprintf(fq,"\n"); fflush(fq);                        */               /*------------------------------------------------------*/                   break;             }               else {                  sort_fitness(niche,numNiches);               if (printFitnessScore)                   printf("generation[%4d]: fitness score: %4d\n",ii+1,(int)niche[0].value);                    /*----------------------------------------------------------*/                /* Replace the worst chromosomes (total: numNiches) in each */               /* niche by the best chromsomes (one from each niche).      */               /*----------------------------------------------------------*/                   m=populationSize-1; n=0;               do {                     bestNicheId=niche[n].index;                  bestChrId  =fitness[bestNicheId][0].index;                     for (jj=0; jj<numNiches; jj++) {                     worstChrId=fitness[jj][m].index;                     for (i=0; i<chromosomeLength; i++)                        allChr[jj][worstChrId][i]=allChr[bestNicheId][bestChrId][i];                     fitness[jj][m].value=niche[n].value;                    }                     /*---------------------------------------------*/                  /* if numNiches is greater than populationSize */                  /*---------------------------------------------*/                  if (m==0) break;                  m--; n++;               } while (n<numNiches);                  for (jj=0; jj<numNiches; jj++) {                     /*------------------------------------------------------*/                   /* The worst chromosomes have been replaced by the best */                  /* chromosomes, the <fitness> array needs to be updated */                  /*------------------------------------------------------*/                   sort_fitness(fitness[jj],populationSize);                    /* fitness score based selection  - roulette-wheel       */                   roulett_wheel(fitness[jj],populationSize,weight);                  mutation(allChr[jj],numVariables,chromosomeLength,populationSize,weight);               }            }          }              if (numSolutionObtained !=0) {                      if (numSolutionObtained%10==0 || numSolutionObtained==numSolutionsSpecified) {               if (application==4)                  printf("split[%d]: number of near-optimal solutions obtained so far: %d\n",iii+1,numSolutionObtained);               else if (application==3)                  printf("leave-sample[%d]-out: number of near-optimal solutions obtained so far: %d\n",                  numSamples-iii,numSolutionObtained);               else                  printf("number of near-optimal solutions obtained so far: %d\n",numSolutionObtained);            }            /* writes out the results every some steps */            if (numSolutionObtained%500==0 || numSolutionObtained==numSolutionsSpecified) {               f_update=fopen("selection_count.txt","w");               fprintf(f_update,"Total number of near-optimal solutions obtained so far: %d\n\n",numSolutionObtained);               fprintf(f_update,"This file can be sorted using unix command:\n");               fprintf(f_update,"   sort -k2 -r selection_count.out > sorted_output.txt\n");                fprintf(f_update,"It can also be displayed using any data display program.\n\n");               fprintf(f_update,"Gene ID  No. of times it being selected  Freq.\n");               fprintf(f_update,"-------  ------------------------------  -----\n");               for (j=0; j<numVariables; j++)                  fprintf(f_update,"%5d      %15d               %5.3f\n",                     selectCount[j].index+1,selectCount[j].total,(double)selectCount[j].total/(double)numSolutionObtained);               fprintf(f_update,"----------------------------------------------------\n");               fprintf(f_update,"Total number of variables (genes, m/z): %d\n",numVariables);               fprintf(f_update,"Chromosome length (d):                  %d\n",chromosomeLength);                  t=time(NULL);               c=asctime(localtime(&t));               fprintf(f_update,"\nFinished: %s",c);               fclose(f_update);                  /*------------------------------------------------------------------*/               /* <selectCount> array contains the number of times a gene is being */               /* selected. It is being updated as the search continues. Thus a    */               /* copy of it needs to be made before it is sorted.                 */               /*------------------------------------------------------------------*/               for (i=0; i<numVariables; i++) {                  countCopy[i].total=selectCount[i].total;                   countCopy[i].index=selectCount[i].index;                }                  sort_count(countCopy,numVariables);               /* only output at most the top 500 genes, see ga_knn.h for details */               if (numVariables>NUM_TOP_GENES)                  output_rank_list(countCopy,expValue,numSamples,NUM_TOP_GENES,sample,variableName);               else                   output_rank_list(countCopy,expValue,numSamples,numVariables,sample,variableName);               if (application!=3) {                  /* update test set prediction result */                  f_update=fopen("predict_test_update.txt","w");                  fprintf(f_update,"  number solutions obtained: %d\n",numSolutionObtained);                  fprintf(f_update,"\n----------------test set [%d]------------------\n\n",iii+1);                  fprintf(f_update," original classes: ");                  for (j=0; j<numTesting; j++) fprintf(f_update,"%1d",sample[j+numTraining].class);                  fprintf(f_update,"\n");                          /* if all num of solutions obtained = num of solutions specified */                  if (numSolutionObtained==numSolutionsSpecified) {                     fprintf(fq3,"\ntest set [%4d]:\n",iii+1);                     fprintf(fq3,"      sample index: ");                     for (j=0; j<numTesting; j++) fprintf(fq3,"%4d",sample[j+numTraining].id+1);                     fprintf(fq3,"\n");                     fprintf(fq3,"  original classes: ");                     for (j=0; j<numTesting; j++) fprintf(fq3,"%4d",sample[j+numTraining].class);                     fprintf(fq3,"\n");                  }               }               else {                  /* print out update every some solutions */                  f_update=fopen("loocv_update.txt","w");                  fprintf(f_update,"  number solutions obtained: %d\n",numSolutionObtained);                  fprintf(f_update,"\n----------------left-out-sample[%d]------------------\n\n",numSamples-iii);                  fprintf(f_update," original classes: ");                  for (j=0; j<numTesting; j++) fprintf(f_update,"%1d",sample[j+numTraining].class);                  fprintf(f_update,"\n");                          /* when all num of solutions obtained = num of solutions specified */                  if (numSolutionObtained==numSolutionsSpecified) {                     fprintf(fq3,"Note: class type 99: a sample can't be classified to a single class\n");                     fprintf(fq3,"left-out-sample[%4d]:\n",numSamples-iii);                     fprintf(fq3,"  original classes: ");                     for (j=0; j<numTesting; j++)                        fprintf(fq3,"%4d",sample[j+numTraining].class);                     fprintf(fq3,"\n");                  }               }               for (i=0; i<200; i++) {                  numTopGenes =i+1;                  if (numTopGenes>=numVariables) break;                  distance_test(expValue,numTraining,numTesting,numTopGenes,countCopy,neighbors,knn);                  predict_class(sample,numTesting,class,neighbors,knn,predictedClass,majorityRule);                     if (application!=3) {                     fprintf(f_update,"predicted classes: ");                     for (j=0; j<numTesting; j++)                        fprintf(f_update,"%1d",predictedClass[j]);                     fprintf(f_update,"  top-ranked variables (gene, m/z)=%d\n",numTopGenes);                     if (numSolutionObtained==numSolutionsSpecified) {                         fprintf(fq3,"top[%3d] predicted: ",numTopGenes);                        for (j=0; j<numTesting; j++)                           fprintf(fq3,"%4d",predictedClass[j]);                        fprintf(fq3,"\n");                        fflush(fq3);                     }                     totalCorrectOneRun=0;                     for (j=0; j<numTesting; j++) {                        if (predictedClass[j]==sample[j+numTraining].class)                           totalCorrectOneRun++;                      }                     percentCorrectOneRun[i]=(double)totalCorrectOneRun/(double)numTesting;                  }                  else {                     fprintf(f_update,"predicted classes: ");                     for (j=0; j<numTesting; j++)                        fprintf(f_update,"%1d",predictedClass[j]);                     fprintf(f_update,"  number of top-ranked variables (genes or m/z) used in prediction: %d\n",numTopGenes);                     if (numSolutionObtained==numSolutionsSpecified) {                         fprintf(fq3,"top[%3d] predicted: ",numTopGenes);                        for (j=0; j<numTesting; j++)                           fprintf(fq3,"%4d",predictedClass[j]);                        fprintf(fq3,"\n");                        fflush(fq3);                     }                  }               }               fclose(f_update);            }         }         else {            printf("The maximal number of generations has reached. No solution is found!\n");            printf("Make sure the termination cutoff (%d) is not too stringent.\n",target_R2);         }      } while (numSolutionObtained<numSolutionsSpecified);      if (application==4) {         fprintf(fq2,"split[%4d] top-ranked variables (genes,m/z) and percentage of correct prediction:\n",iii+1);         for (i=0; i<200; i++) {             if ((i+1)>=numVariables) break;             fprintf(fq2,"%5d ",i+1);         }         fprintf(fq2,"\n");         if (numTesting !=0) {            for (i=0; i<40; i++) {                if ((i+1)>=numVariables) break;                fprintf(fq2,"%5.3f ",percentCorrectOneRun[i]);            }            fprintf(fq2,"\n");            fflush(fq2);             }      }   }   fclose(fq3);   if (application==4)  fclose(fq2);    if (sample)              { destroy_sample(sample,numSamples);         }   if (class)               { destroy_class(class);                      }   if (neighbors[0])        { free(neighbors[0]);   neighbors[0]=NULL;   }   if (neighbors)           { free(neighbors);      neighbors=NULL;      }   if (fitness[0])          { free(fitness[0]);     fitness[0]=NULL;     }   if (fitness)             { free(fitness);        fitness=NULL;        }   if (niche)               { free(niche);          niche=NULL;          }   if (weight)              { free(weight);         weight=NULL;         }   if (predictedClass)      { free(predictedClass); predictedClass=NULL; }   if (expValue[0])         { free(expValue[0]);    expValue[0]=NULL;    }   if (expValue)            { free(expValue);       expValue=NULL;       }   if (allChr[0][0])        { free(allChr[0][0]);   allChr[0][0]=NULL;   }   if (allChr[0])           { free(allChr[0]);      allChr[0]=NULL;      }   if (allChr)              { free(allChr);         allChr=NULL;         }   if (bestChr)             { free(bestChr);        bestChr= NULL;       }   if (selectCount)         { free(selectCount);    selectCount=NULL;    }   if (variableName)            { free(variableName);       variableName=NULL;       }   if (countCopy)           { free(countCopy);      countCopy=NULL;      }   if (missingIndicator[0]) { free(missingIndicator[0]); missingIndicator[0]=NULL; }   if (missingIndicator)    { free(missingIndicator);    missingIndicator=NULL;    }   return (1);}/* this piece has been rewritten. It should be more robust than the previous one */double **read_data(char *inputFile,char *dataFileName,int numSamples,   int numVariables,char **variableName,SampleInfo *sample,char **missingIndicator) {   FILE *fp;   int columnCount,rowCount;   int tabCount,missingCount,len,numWhiteSpace;   int *tabPosition;   double **expValue;   char buffer[10000],tmp[10000];   register int i,j,k;   fp=fopen(dataFileName,"r");   if (!fp) {      perror(dataFileName);       printf("\nMake sure the data file Name and Location (path) are exactly\n");      printf(" the same as they are specified in %s.\n\n",inputFile);      exit(0);    }    for (i=0; i<numSamples; i++) sample[i].id=i;   /*-------------------------------------------------------------------*/   /*                   reading sample name                             */   /*-------------------------------------------------------------------*/   tabPosition=alloc_int(numSamples+5);   fgets(buffer,10000,fp);   len=strlen(buffer);   buffer[len-1]='\0';   tabCount=0;   for (i=0; i<len; i++) {      if(buffer[i]=='\t') {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -