📄 brfcm.c
字号:
{ int i,x; for (i=0; i < C; i++) { for (x=0; x < S; x++) { if ( bins[k].X[x] != V[i][x] ) break; } if ( x == S ) /* X==V */ return i; } return -1;} int is_equal(double *v1, double *v2){ int i; for (i=0; i < S; i++) if ( v1[i] != v2[i] ) return 0; return 1;}double distance(double *v1, double *v2){ int x; double sum=0; for (x=0; x < S; x++) sum += (v1[x]-v2[x]) * (v1[x]-v2[x]); return sqrt(sum);}/* distribute U matrix values to the entire dataset. Creates a full U matrix, copies things over and destroys the old one. */int distribute_membership(){ double **newU; int i,j,k; newU=(double **)CALLOC(N,sizeof(double *)); for (i=0; i < N; i++) newU[i]=(double *)CALLOC(C,sizeof(double)); /* Go through each bin, and distribute U values */ for (i=0; i < N0; i++) { for (j=0; j<bins[i].w; j++) { for (k=0; k < C; k++) { newU[bins[i].members[j]][k]=U[i][k]; } } free(U[i]); } free(U); U=newU; return 0;}/*============================================================= Public output utilities output_centroids(char *filestem) output_umatrix(char *filestem) output_members(char *filestem) =============================================================*/int output_centroids(char *filestem){ FILE *fp; char buf[1024]; int i,j; sprintf(buf,"%s.centroids", filestem); fp=FOPEN(buf,"w"); for (i=0;i < C ;i++) { for (j=0; j < S; j++) fprintf(fp, "%f\t",V[i][j]); fprintf(fp,"\n"); } fclose(fp); return 0;}int output_umatrix(char *filestem){ FILE *fp; char buf[1024]; int i,j; sprintf(buf, "%s.umatrix", filestem); fp=FOPEN(buf,"w"); for (i=0; i < N; i++) { for (j=0; j < C; j++) fprintf(fp,"%f\t", U[i][j]); fprintf(fp,"\n"); } fclose(fp); return 0;}int output_members(char *filestem){ FILE *fp; char buf[1024]; int i,j,max; sprintf(buf,"%s.members",filestem); fp=FOPEN(buf,"w"); for (i=0; i < N; i++ ) { for (max=j=0; j < C; j++) { if ( U[i][j] > U[i][max] ) max=j; } fprintf(fp,"%d\n",max); } fclose(fp); return 0;}/*================================================================ Reduction utilities. Functions used for reduction of data elements (via quantization and aggregation). reduce() - Reduce all vectors reduce_vector() - Perform the quantization (bit shifting) create_new_bin() - Not found, add a new bin ================================================================*//* reduce() - Reduce the dataset to n0 vectors, and return n0 */int reduce(){ int i,x; double alpha; int target; double *rv; N0=0; max_number_of_collisions=0; /* Allocate enough storage for 25% of n, assuming a 3/4 reduction */ bins=(Bins *)CALLOC((int)(N*(1.0-expected_reduction)), sizeof(Bins)); rv=(double *)CALLOC(S,sizeof(double)); /* If hashing is used, intialize it */ if ( use_hashing ) hash_init(); /* Go through each example and add it to the appropriate bin */ for (i=0; i < N; i++) { /* Reduce vector into rv */ reduce_vector( rv, X[i], R); /* Find it (either via hashing or directly) */ if ( use_hashing ) target=hash_find(rv); else target=simple_find(rv, N0); /* If bin not found, create a new one */ if ( target == -1 ) { create_new_bin(rv,N0); if ( use_hashing ) hash_update(rv, N0); target=N0; N0++; } /* Add to current bin */ bins[target].members=(int *)REALLOC(bins[target].members, (bins[target].w+1) * sizeof(int)); bins[target].members[bins[target].w]=i; bins[target].w++; /* Update running average */ alpha=1.0/(bins[target].w); for (x=0; x < S; x++) { bins[target].X[x]=(1.0-alpha) * bins[target].X[x] + alpha * X[i][x]; } } /* Get timing information */ getrusage(RUSAGE_SELF,&compressing_usage); return N0;}/* Reduce a vector (quantize) by shifting k bits. */int reduce_vector(double *rv, double *v, int k){ int i; for (i=0; i < S; i++) rv[i]=((int)v[i]) >> k; return 0;}/* No bin found for vector, create a new one */int create_new_bin(double *rv, int n0){ int i; if ( n0 >= N*(1.0-expected_reduction) ) die("Expected reduction overflow %d",n0); bins[n0].rX=(double *)CALLOC(S,sizeof(double)); bins[n0].X=(double *)CALLOC(S,sizeof(double)); bins[n0].members=NULL; bins[n0].w=0; for (i=0; i < S; i++) { bins[n0].rX[i]=rv[i]; bins[n0].X[i]=0; } return n0;} /*==================================================================== Finding routines. The most intensive task in reduction and aggregation is attempting to match identical feature vectors. We can do a simple linear search through the vectors, or use hashing. simple_find() - Find appropriate location through a linear search ====================================================================*/int simple_find(double *rv, int n0){ int i; for (i=0; i < n0; i++) if ( is_equal(bins[i].rX, rv) ) return i; return -1;}/*============================= Hashing maintenance routines =============================*/struct HashElementStruct { int v; struct HashElementStruct *next;};typedef struct HashElementStruct HashElement;static HashElement **hash_table;static int *a;static int expectedN;static int mod_value;/* The hash is a chained hash, therefore find the bucket and search the chain. */int hash_find(double *rv){ HashElement *p; p=hash_table[h(rv)]; for (; p != NULL; p=p->next) if ( is_equal(bins[p->v].rX, rv) ) return p->v; return -1;}/* The hashing function. Returns an index into hash table */int h(double *v){ int i; int sum=0; for (i=0; i < S; i++) sum += a[i] * v[i]; sum = sum % mod_value; return abs(sum);}/* Initialize the hash data structure. Based on the expected number of entries, the h() function and the hash table size is tweaked.*/int hash_init(){ int i; expectedN= N * (1.0 - expected_reduction); /* Allocate storage space */ hash_table=(HashElement **)CALLOC(expectedN, sizeof(HashElement *)); a=(int *)CALLOC(S,sizeof(int)); mod_value = expectedN/expected_number_of_collisions; /* We use rand48() elsewhere, so they shouldn't? conflict. */ srand(time(NULL)); /* Initialize a table to random values */ for (i=0; i < S; i++) a[i]=rand() % mod_value; return 0; }/* hash_update - The value of v was not in the hash table, insert it */int hash_update(double *v, int index){ int hash_value; HashElement *p; HashElement *new_hash_element; int number_of_collisions=0; hash_value=h(v); p=hash_table[hash_value]; new_hash_element=(HashElement *)CALLOC(1,sizeof(HashElement)); new_hash_element->next=0; new_hash_element->v=index; if ( p == 0 ) { hash_table[hash_value]=new_hash_element; } else { for (;p->next != 0; p=p->next, number_of_collisions++); p->next=new_hash_element; } if ( number_of_collisions > max_number_of_collisions) max_number_of_collisions=number_of_collisions; return 0;}/*============================================================== * * FCM routines. * * Once the bit-reduced clustering is finished, we can run the * full fcm against the dataset. U has already been allocated * n entries, so we start with V (which is already initialized) * and cluster from there. * * This should not be as expensive, since we should be "mostly" * clustered according to FCM, from using BRFCM. * *==============================================================*/int fcm();int fcm_update_centroids();double fcm_update_umatrix();int fcm_is_example_centroid();double fcm_distance();int fcm(){ double sqrerror = 2 * epsilon; int number_of_iterations = 0; /* Cluster centers are in V, update U matrix based on it */ fcm_update_umatrix(); /* Run the updates iteratively */ while (sqrerror > epsilon ) { number_of_iterations++; fcm_update_centroids(); sqrerror=fcm_update_umatrix(); } /* We go ahead and update the centroids - presumably this will not change much, since the overall square error in U is small */ fcm_update_centroids(); return 0;}/* If X[k] == V[i] for some i, then return that i. Otherwise, return -1 */int fcm_is_example_centroid(int k){ int i,x; for (i=0; i < C; i++) { for (x=0; x < S; x++) { if ( (double)(X[k][x]) != V[i][x] ) break; } if ( x == S ) /* X==V */ return i; } return -1;}double fcm_distance(double *v1, double *v2){ int x; double sum=0; for (x=0; x < S; x++) sum += (v1[x]-v2[x]) * (v1[x]-v2[x]); return sum;}/* fcm_update_centroids() Given a membership matrix U, recalculate the cluster centroids as the "weighted" mean of each contributing example from the dataset. Each example contributes by an amount proportional to the membership value.*/int fcm_update_centroids(){ int i,k,x; double numerator[S], denominator; double U_ikm; /* For each cluster */ for (i=0; i < C; i++) { /* Zero out numerator and denominator options */ denominator=0; for (x=0; x < S; x++) numerator[x]=0; /* Calculate numerator and denominator together */ for (k=0; k < N; k++) { U_ikm=pow(U(i,k),m); denominator += U_ikm; for (x=0; x < S; x++) numerator[x] += U_ikm * X[k][x]; } /* Calculate V */ for (x=0; x < S; x++) { V[i][x]= numerator[x] / denominator; } } /* endfor: C clusters */ return 0;}double fcm_update_umatrix(){ int i,j,k; int example_is_centroid; double summation, D_k[C]; double square_difference=0; double newU; /* For each example in the dataset */ for ( k=0; k < N; k++) { /* Special case: If Example is equal to a Cluster Centroid, then U=1.0 for that cluster and 0 for all others */ if ( (example_is_centroid=fcm_is_example_centroid(k)) != -1 ) { for (i=0; i < C; i++) { if ( i == example_is_centroid ) U(i,k)=1.0; else U(i,k)=0.0; } continue; } /* Cache the distance between this vector and all centroids. */ for (i=0; i < C; i++) D_k[i]=fcm_distance(X[k], V[i]); /* For each class */ for (i=0; i < C; i++) { summation=0; /* Calculate summation */ for (j=0; j < C; j++) { if ( i == j ) summation+=1.0; else summation += pow( D_k[i] / D_k[j] , (2.0/ (m-1))); } /* Weight is 1/sum */ newU=1.0/(double)summation; /* Add to the square_difference */ square_difference += (U(i,k) - newU) * (U(i,k) - newU); U(i,k)=newU; } } /* endfor n */ return sqrt(square_difference);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -