📄 main.c
字号:
/*-----------------------------------------------------------*/void distance(double **expValue,int *which1,int chromosomeLength, int numSamples,Neighbor **neighbors,int knn) { register int i,j,k; double sum; for (i=0; i<numSamples; i++) { (*(*(neighbors+i)+i)).dis=99999.0; (*(*(neighbors+i)+i)).id=i; } for (i=0; i<numSamples-1; i++) { for (j=i+1; j<numSamples; j++) { sum=0.0; for (k=0; k<chromosomeLength; k++) { sum += (expValue[i][which1[k]]-expValue[j][which1[k]])* (expValue[i][which1[k]]-expValue[j][which1[k]]); } neighbors[i][j].dis=sqrt(sum); neighbors[i][j].id =j; neighbors[j][i].dis=neighbors[i][j].dis; neighbors[j][i].id =i; } } /* sort the distance in ascending order */ for (i=0; i<numSamples; i++) shaker_sort_ascen(neighbors[i],numSamples,knn+1);}void distance_test(double **expValue,int numTraining,int numTesting, int numTopGenes,Solution *countCopy,Neighbor **neighbors,int knn) { register int i,j,k,l; double sum; for (i=0,l=numTraining; i<numTesting; i++,l++) { for (j=0; j<numTraining; j++) { sum=0.0; for (k=0; k<numTopGenes; k++) { sum += (expValue[l][countCopy[k].index]-expValue[j][countCopy[k].index])* (expValue[l][countCopy[k].index]-expValue[j][countCopy[k].index]); } neighbors[i][j].dis=sqrt(sum); neighbors[i][j].id =j; } } for (i=0; i<numTesting; i++) { shaker_sort_ascen(neighbors[i],numTraining,knn+1); /*----------------------------------------------- for (j=0; j<knn; j++) printf("test %d: %d[%lf]",i+1,neighbors[i][j].id,neighbors[i][j].dis); -----------------------------------------------*/ } }void center_array(double **expValue,int numSamples,int numVariables) { int i,j; double median,*gene=NULL; gene=alloc_double(numVariables); for (i=0; i<numSamples; i++) { for (j=0; j<numVariables; j++) *(gene+j)=*(*(expValue+i)+j); shaker_double_ascent(gene,numVariables); if (numVariables%2==0) median=(gene[(numVariables-2)/2]+gene[numVariables/2])/2.0; else median=gene[(numVariables-1)/2]; printf("median for array[%3d]: %8.3f\n",i+1,median); for (j=0; j<numVariables; j++) *(*(expValue+i)+j) -= median; } printf("\n"); if (gene) { free(gene); gene=NULL; }}void output_rank_list(Solution *count,double **expValue,int numSamples, int numVariables,SampleInfo *sample,char **variableName) { FILE *fp; register int i,j; fp=fopen("variable_ranked_by_GA_KNN.txt","w"); fprintf(fp,"Sample\t"); for (i=0; i<numSamples; i++) { if (i<(numSamples-1)) fprintf(fp,"%s\t",sample[i].name); else fprintf(fp,"%s\n",sample[i].name); } for (i=0; i<numVariables; i++) { fprintf(fp,"%s\t",variableName[count[i].index]); for (j=0; j<numSamples; j++) { if (j<(numSamples-1)) fprintf(fp,"%6.3f\t",expValue[j][count[i].index]); else fprintf(fp,"%6.3f\n",expValue[j][count[i].index]); } } fclose(fp);}/* standardization - z transformation */void autoscale(double **expValue,int numSamples,int numVariabless) { int i,j; double ave,sd; for (i=0; i<numVariabless; i++) { ave=0.0; for (j=0; j<numSamples; j++) { ave += *(*(expValue+j)+i); } ave /= (double)numSamples; sd=0.0; for (j=0; j<numSamples; j++) sd += (expValue[j][i]-ave)*(expValue[j][i]-ave); sd=sqrt(sd/(double)(numSamples-1)); if (sd<0.01) { for (j=0; j<numSamples; j++) expValue[j][i]=0.0; } else { for (j=0; j<numSamples; j++) expValue[j][i] = (expValue[j][i]-ave)/sd; } }}void range_scale(double **expValue,int numSamples,int numVariables) { register int i,j; double *tmp; tmp=alloc_double(numSamples); for (i=0; i<numVariables; i++) { for (j=0; j<numSamples; j++) tmp[j] = expValue[j][i]; shaker_double_ascent(tmp,numSamples); for (j=0; j<numSamples; j++) expValue[j][i]=(expValue[j][i]-tmp[0])/(tmp[numSamples-1]-tmp[0]); } if (tmp) { free(tmp); tmp=NULL; }}/*--------------------------------------------------------*//* random selection of sample id without replacement *//* the number of samples in each class in both training *//* and test sets are proportional. *//*--------------------------------------------------------*/void splitIntoTrainingTest(double **expValue,int numVariables,SampleInfo *sample, int numSamples,Class *class,int numTraining) { register int i,j,k; int cn,cn2,du,numSoFar,numNeeded,found,used; int *wh; double **tmp_value; SampleInfo *tmp_sample; wh=alloc_int(numSamples); tmp_value=alloc_double_double(numSamples,numVariables); tmp_sample=alloc_sample(numSamples); for (i=0; i<numSamples; i++) wh[i]=-1; cn=0; for (i=0; i<class->num_class; i++) { /*---------------------------------------------------*/ /* allow the number of samples in both training and */ /* test sets are proportional */ /*---------------------------------------------------*/ numNeeded=(int)(class->count[i]*(double)numTraining/(double)numSamples); numSoFar=0; while(numSoFar<numNeeded) { du=(int)(class->count[i]*random_gen()); if (du==class->count[i]) du--; used=0; for (k=0; k<cn; k++) { if (class->which[i][du]==wh[k]) { used=1; break; } } if (!used) { wh[cn]=class->which[i][du]; cn++; numSoFar++; } }; } cn2=0; for (i=0; i<numSamples; i++) { found=0; for (j=0; j<cn; j++) { if (wh[j]==i) { found=1; break; } } if (!found) { wh[cn+cn2]=i; cn2++; } } printf("total number of samples in test set: %d\n",cn2); for (i=0; i<numSamples; i++) { for (j=0; j<numVariables; j++) tmp_value[i][j] = expValue[wh[i]][j]; tmp_sample[i].id = sample[wh[i]].id; tmp_sample[i].class= sample[wh[i]].class; strcpy(tmp_sample[i].name,sample[wh[i]].name); } for (i=0; i<numSamples; i++) { for (j=0; j<numVariables; j++) expValue[i][j] = tmp_value[i][j]; sample[i].id = tmp_sample[i].id; sample[i].class = tmp_sample[i].class; strcpy(sample[i].name,tmp_sample[i].name); } if (wh) { free(wh); wh=NULL; } if (tmp_value[0]) { free(tmp_value[0]); tmp_value[0]=NULL; } if (tmp_value) { free(tmp_value); tmp_value=NULL; } if (tmp_sample) { destroy_sample(tmp_sample,numSamples); } /* re-assign class will be carried out */ if (class) { destroy_class(class); }}/* move the one to be predicted to the bottom of the data array */void move_LOO_bottom(double **expValue,int numVariables,SampleInfo *sample, int numSamples,int which) { register int j; int tmp_class,tmp_id; double tmp_value; char *tmp_name; tmp_name=alloc_char(MAX_NUM_CHAR); for (j=0; j<numVariables; j++) { tmp_value=expValue[numSamples-1][j]; expValue[numSamples-1][j]=expValue[numSamples-which-1][j]; expValue[numSamples-which-1][j]=tmp_value; } tmp_class=sample[numSamples-1].class; sample[numSamples-1].class=sample[numSamples-which-1].class; sample[numSamples-which-1].class=tmp_class; tmp_id=sample[numSamples-1].id; sample[numSamples-1].id=sample[numSamples-which-1].id; sample[numSamples-which-1].id=tmp_id; strcpy(tmp_name,sample[numSamples-1].name); strcpy(sample[numSamples-1].name,sample[numSamples-which-1].name); strcpy(sample[numSamples-which-1].name,tmp_name); if (tmp_name) { free(tmp_name); tmp_name=NULL; }}void check_knn(int numSamples,int class_min,int knn,int application) { if (application==3) { if (knn>class_min-2) { printf("\nFor leave-one-out, KNN must not be larger than the number of\n"); printf(" samples in the smallest class -2. In your case, the smallest\n"); printf(" class has %d samples, leaving-one-out leads to %d samples\n",class_min,class_min-1); printf(" left with %d neighbor(s) of the same type. The maximal\n",class_min-2); printf(" value of KNN can have for this data set is: %d.\n\n",class_min-2); printf("Please change the KNN in the input parameter file accordingly.\n\n"); exit(0); } } else if (application==4) { if (class_min<6 || numSamples<20) { printf("\nFor a small data set of less than 20 samples with two or more\n"); printf(" classes, one should use a leave-one-out cross-validation instead\n"); printf(" of leave-many-out. One should also use leave-one-out when one class\n"); printf(" has only a few samples even the overall sample size is large.\n\n"); } } if (knn>class_min-1) { printf("\nKNN must not be larger than the number of samples in the smallest\n"); printf(" class -1. In your case, the smallest class has %d samples. Thus,\n",class_min); printf(" the maximal value KNN can have is: %d.\n\n",class_min-1); printf("Please change the KNN in the input parameter file accordingly.\n\n"); exit(0); }} void rank_by_variation(double **expValue,int numSamples,int numVariables, SampleInfo *sample,char **variableName) { register int i,j,id; double ave,sd; Fitness *variation; FILE *fp; variation=(Fitness *)calloc(numVariables,sizeof(Fitness )); for (i=0; i<numVariables; i++) { ave=0; for (j=0; j<numSamples; j++) ave +=expValue[j][i]; ave /=(double)numSamples; sd=0; for (j=0; j<numSamples; j++) { sd +=(expValue[j][i]-ave)*(expValue[j][i]-ave); } variation[i].value=sd; variation[i].index=i; } sort_fitness(variation,numVariables); fp=fopen("variation_ranked.dat","w"); fprintf(fp,"Sample\t"); for (j=0; j<numSamples; j++) { if (j<numSamples-1) fprintf(fp,"%s\t",sample[j].name); else fprintf(fp,"%s\n",sample[j].name); } fprintf(fp,"Class\t"); for (j=0; j<numSamples; j++) { if (j<numSamples-1) fprintf(fp,"%1d\t",sample[j].class); else fprintf(fp,"%1d\n",sample[j].class); } for (i=0; i<numVariables; i++) { id=variation[i].index; /* if (atof(variableName[id])<=2000
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -