📄 main.c
字号:
numclass++; } } cn=alloc_int(numclass); for (j=0; j<numclass; j++) cn[j]=0; for (i=0; i<numSamples; i++) { for (j=0; j<numclass; j++) { if (sample[i].class==class->type[j]) { class->which[j][cn[j]]=i; (cn[j])++; break; } } } for (j=0; j<numclass; j++) class->count[j]=cn[j]; class->num_class=numclass; if (cn) { free(cn); cn=NULL; } cmin=9999; cmax=0; for (j=0; j<numclass; j++) { if (class->count[j]<cmin) cmin=class->count[j]; if (class->count[j]>cmax) cmax=class->count[j]; } class->max=cmax; class->min=cmin; printf("\nTotal number of classes: %4d, individual class type: ",numclass); for (i=0; i<numclass; i++) printf("%3d ",class->type[i]); printf(".\n"); printf("Number of samples in each class: "); for (j=0; j<numclass; j++) printf("%3d [type - %1d] ",class->count[j],class->type[j]); printf("min max=%4d %4d\n",class->min,class->max); printf("\n"); for (i=0; i<class->num_class; i++) { printf("class[%d]: ",class->type[i]); for (j=0; j<class->count[i]; j++) { printf("%4d ",class->which[i][j]+1); } printf("\n"); } if (numclass==1) { printf("\n\tThe GA/KNN algorithm selects the most discriminative variables for classification.\n"); printf("The data must consist of at least two classes of samples (e.g, normal vs tumor). It is\n"); printf("capable of handling multiple classes\n"); printf("\tYour data has only one class...thanks for trying... Goodbye...\n\n"); exit(0); } return (class);}void missing_value_impute(double **expValue,Class *class,SampleInfo *sample, char **missingIndicator,int numVariables,int numSamples) { register int i,j,k,l; int numMissing,inMissing,cn,id; int *whichSample; double classSum; whichSample=alloc_int(numSamples); for (i=0; i<numVariables; i++) { numMissing=0; for (j=0; j<numSamples; j++) { if (missingIndicator[i][j]=='1') { whichSample[numMissing]=j; numMissing++; } } for (j=0; j<numMissing; j++) { id=-1; cn=0; classSum=0; for (k=0; k<class->num_class; k++) { if (sample[whichSample[j]].class==class->type[k]) { id=k; break; } } for (k=0; k<class->count[id]; k++) { inMissing=0; for (l=0; l<numMissing; l++) { if (class->which[id][k]==whichSample[l]) { inMissing=1; break; } } if (!inMissing) { classSum +=expValue[class->which[id][k]][i]; cn++; } } if (cn<class->count[id]/2) { printf("\nToo many missing cells in a row[%d] for class[%d]\n",i+3,class->type[id]); printf("A quick and dirty way can't fix it -:)...\n"); printf("You may want to remove the row[%d] and try it again...\n\n",i+3); exit(0); } else { classSum /=(double)cn; expValue[whichSample[j]][i]=classSum; } } }}void initialize_chr(int populationSize,int numVariables,int chromosomeLength, int ***allChr,int numNiches) { register int i,j,k,l; int total; int **chr; int *tmp1; chr =alloc_int_int(numNiches*populationSize,chromosomeLength); tmp1=alloc_int(chromosomeLength); if (chromosomeLength==numVariables) { printf("\nNote: chromosome length equals to number of variables (genes,m/z).\n"); printf("!!!All chromosomes will be identical!!!\n\n"); for (i=0; i<numNiches; i++) { for (j=0; j<populationSize; j++) { for (k=0; k<numVariables; k++) *(*(*(allChr+i)+j)+k)=k; } } } else { total=0; do { random_selection(tmp1,chromosomeLength,numVariables); for (k=0; k<chromosomeLength; k++) *(*(chr+total)+k)=*(tmp1+k); total++; } while (total<numNiches*populationSize); for (i=0; i<numNiches; i++) { l=i*populationSize; for (j=0; j<populationSize; j++) { for (k=0; k<chromosomeLength; k++) { *(*(*(allChr+i)+j)+k)=*(*(chr+l+j)+k); } } } } /* printf("generating initial population of chromosomes - done...\n"); */ if (tmp1) { free(tmp1); tmp1=NULL; } if (chr[0]) { free(chr[0]); chr[0]=NULL; } if (chr) { free(chr); chr=NULL; }}void random_selection(int *which,int chromosomeLength,int numVariables) { register int i; int numFilled,dummy,used; numFilled=0; for (i=0; i<chromosomeLength; i++) *(which+i)=-1; while (numFilled<chromosomeLength) { dummy=(int)(numVariables*random_gen()); if (dummy == numVariables) dummy--; used=0; for (i=0; i<numFilled; i++) { if (dummy==*(which+i)) { used=1; break; } } if (!used) { *(which+numFilled)=dummy; numFilled++; } }}/*------------------------------------------------------------------------ Note: this subroutine and the sort_fitness subroutine maximize the fitness score, not minimize it. If you want to achieve the opposite (minimization), modify the following: int roulett_wheel: range = fitness[populationSize-1]-fitness[0].value; for (i=0; i<populationSize; i++) { scaledScore[i] = 1-(fitness[i].value-fitness[0].value)/range; totalScore += scaledScore[i]; } in sort_fitness: int Compare_fitness(const void *s1, const void *s2) { if (((Fitness *)s1)->value<((Fitness *)s2)->value) { return -1; } if (((Fitness *)s1)->value>((Fitness *)s2)->value) { return 1; } return 0; }------------------------------------------------------------------------*/void roulett_wheel(Fitness *fitness,int populationSize,Wheel *wheel) { register int i; double totalScore,worstScore,range,area; double *scaledScore; worstScore=fitness[populationSize-1].value; range=fitness[0].value-worstScore; /*---------------------------------------------------*/ /* if the first and last chromosomes (after sorting) */ /* have same score, GA converged */ /*---------------------------------------------------*/ if (range<0.00001) printf("GA converged ...\n"); else { scaledScore=alloc_double(populationSize); totalScore=0; for (i=0; i<populationSize; i++) { /* range scale */ scaledScore[i] = (fitness[i].value-worstScore)/range; totalScore += scaledScore[i]; } for (i=0; i<populationSize; i++) scaledScore[i] /= totalScore; area=100.0*scaledScore[0]; wheel[0].start=0; wheel[0].end =area; wheel[0].index=fitness[0].index; for (i=1; i<populationSize; i++) { wheel[i].start=wheel[i-1].end; area=100.0*scaledScore[i]; wheel[i].end=area+wheel[i].start; wheel[i].index=fitness[i].index; } if (scaledScore) { free(scaledScore); scaledScore = NULL; } /*--------------------------------------------------------- for (i=0; i<populationSize; i++) { printf("start, end and index %5.1f\t%5.1f\t%4d\n", wheel[i].start, wheel[i].end, wheel[i].index); } ---------------------------------------------------------*/ }}/* qsort */void sort_fitness(Fitness *fitness,int size) { int (*compar)(const void *,const void *); compar=Compare_fitness; qsort((void *)fitness,(size_t)size,sizeof(Fitness),compar);}int Compare_fitness(const void *s1, const void *s2) { if (((Fitness *)s1)->value < ((Fitness *)s2)->value) { return 1; } if (((Fitness *)s1)->value > ((Fitness *)s2)->value) { return -1; } return 0;}void sort_count(Solution *count,int size) { int (*compar)(const void *,const void *); compar = Compare_count; qsort((void *)count,(size_t)size,sizeof(Solution),compar);}/* decending order */int Compare_count(const void *s1, const void *s2) { if (((Solution *)s1)->total < ((Solution *)s2)->total) { return 1; } if (((Solution *)s1)->total > ((Solution *)s2)->total) { return -1; } return 0;}void predict_class(SampleInfo *sample,int numSamples,Class *class, Neighbor **neighbors,int original_knn,int *pred_act,int majorityRule) { register int i,j,k; int which,knn,id1,cmax; int *count; int found; count=alloc_int(class->num_class); knn=original_knn; for (i=0;i<numSamples;i++) { for (k=0;k<class->num_class;k++) *(count+k)=0; /* find out how many classes the k neighbors have */ for (j=0; j<knn; j++) { which=neighbors[i][j].id; for (k=0; k<class->num_class; k++) { if (sample[which].class == class->type[k]) { (count[k])++; break; } } } if (majorityRule==0) { found=0; for (k=0; k<class->num_class; k++) { if (*(count+k)==knn) { *(pred_act+i)=class->type[k]; found=1; break; } } if (!found) *(pred_act+i)=UNCLASSIFIABLE; } else { cmax=0; id1=-1; for (k=0; k<class->num_class; k++) { if (*(count+k)>cmax) { cmax=*(count+k); id1=k; } } if (cmax >= (int)(ceil((double)knn/2.0))) *(pred_act+i)=class->type[id1]; else *(pred_act+i)=UNCLASSIFIABLE; } } if (count) { free(count); count =NULL; }}double cal_fitness(SampleInfo *sample,int *pred_act,int numSamples) { register int i; double score; score=0.0; for (i=0; i<numSamples; i++) { if (pred_act[i]==sample[i].class) score += 1.0; } return (score);}/*-----------------------------------------------------------*//* pair wise between each obj and its KNN neighbbors */ /* similarity using the selected features. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -