📄 brfcm.c

📁 这是一个改进的快速实现模糊c-means聚类算法的程序
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
{  int  i,x;  for (i=0; i < C; i++) {    for (x=0; x < S; x++) {      if ( bins[k].X[x] != V[i][x] ) break;    }    if ( x == S )  /* X==V */      return i;  }  return -1;}            int is_equal(double *v1, double *v2){   int i;   for (i=0; i < S; i++)       if ( v1[i] != v2[i] ) return 0;   return 1;}double distance(double *v1, double *v2){   int x;   double sum=0;   for (x=0; x < S; x++)       sum += (v1[x]-v2[x]) * (v1[x]-v2[x]);   return sqrt(sum);}/* distribute U matrix values to the entire dataset. Creates   a full U matrix, copies things over and destroys the old   one. */int distribute_membership(){  double **newU;  int i,j,k;  newU=(double **)CALLOC(N,sizeof(double *));  for (i=0; i < N; i++)    newU[i]=(double *)CALLOC(C,sizeof(double));   /* Go through each bin, and distribute U values */   for (i=0; i < N0; i++) {      for (j=0; j<bins[i].w; j++) {	 for (k=0; k < C; k++) {	    newU[bins[i].members[j]][k]=U[i][k];         }      }      free(U[i]);   }   free(U);   U=newU;   return 0;}/*=============================================================  Public output utilities  output_centroids(char *filestem)  output_umatrix(char *filestem)  output_members(char *filestem)  =============================================================*/int output_centroids(char *filestem){  FILE *fp;  char buf[1024];  int i,j;  sprintf(buf,"%s.centroids", filestem);  fp=FOPEN(buf,"w");  for (i=0;i < C ;i++) {    for (j=0; j < S; j++)       fprintf(fp, "%f\t",V[i][j]);    fprintf(fp,"\n");  }  fclose(fp);  return 0;}int output_umatrix(char *filestem){  FILE *fp;  char buf[1024];  int i,j;  sprintf(buf, "%s.umatrix", filestem);  fp=FOPEN(buf,"w");  for (i=0; i < N; i++) {    for (j=0; j < C; j++)      fprintf(fp,"%f\t", U[i][j]);    fprintf(fp,"\n");  }  fclose(fp);  return 0;}int output_members(char *filestem){  FILE *fp;  char buf[1024];  int i,j,max;    sprintf(buf,"%s.members",filestem);  fp=FOPEN(buf,"w");  for (i=0; i < N; i++ ) {    for (max=j=0; j < C; j++) {      if ( U[i][j] > U[i][max] ) max=j;    }    fprintf(fp,"%d\n",max);  }  fclose(fp);  return 0;}/*================================================================  Reduction utilities.  Functions used for reduction of data elements (via quantization  and aggregation).  reduce()           - Reduce all vectors  reduce_vector()    - Perform the quantization (bit shifting)  create_new_bin()   - Not found, add a new bin  ================================================================*//* reduce() - Reduce the dataset to n0 vectors, and return n0 */int reduce(){  int i,x;  double alpha;  int target;  double *rv;  N0=0;  max_number_of_collisions=0;  /* Allocate enough storage for 25% of n, assuming a 3/4 reduction */  bins=(Bins *)CALLOC((int)(N*(1.0-expected_reduction)), sizeof(Bins));  rv=(double *)CALLOC(S,sizeof(double));  /* If hashing is used, intialize it */  if ( use_hashing ) hash_init();  /* Go through each example and add it to the appropriate bin */  for (i=0; i < N; i++) {    /* Reduce vector into rv */    reduce_vector( rv, X[i], R);    /* Find it (either via hashing or directly) */    if ( use_hashing )      target=hash_find(rv);    else      target=simple_find(rv, N0);    /* If bin not found, create a new one */    if ( target == -1 ) {      create_new_bin(rv,N0);      if ( use_hashing ) hash_update(rv, N0);      target=N0;      N0++;    }    /* Add to current bin */    bins[target].members=(int *)REALLOC(bins[target].members,                                             (bins[target].w+1) * sizeof(int));    bins[target].members[bins[target].w]=i;    bins[target].w++;    /* Update running average */    alpha=1.0/(bins[target].w);    for (x=0; x < S; x++) {      bins[target].X[x]=(1.0-alpha) * bins[target].X[x] +	alpha * X[i][x];    }      }  /* Get timing information */  getrusage(RUSAGE_SELF,&compressing_usage);  return N0;}/* Reduce a vector (quantize) by shifting k bits. */int reduce_vector(double *rv, double *v, int k){  int i;  for (i=0; i < S; i++)    rv[i]=((int)v[i]) >> k;  return 0;}/* No bin found for vector, create a new one */int create_new_bin(double *rv, int n0){  int i;  if ( n0 >= N*(1.0-expected_reduction) )     die("Expected reduction overflow %d",n0);  bins[n0].rX=(double *)CALLOC(S,sizeof(double));  bins[n0].X=(double *)CALLOC(S,sizeof(double));  bins[n0].members=NULL;  bins[n0].w=0;  for (i=0; i < S; i++) {    bins[n0].rX[i]=rv[i];    bins[n0].X[i]=0;  }  return n0;}                /*====================================================================  Finding routines. The most intensive task in reduction and aggregation  is attempting to match identical feature vectors. We can do a   simple linear search through the vectors, or use hashing.  simple_find()  - Find appropriate location through a linear search    ====================================================================*/int simple_find(double *rv, int n0){   int i;   for (i=0; i < n0; i++)       if ( is_equal(bins[i].rX, rv) ) return i;   return -1;}/*=============================  Hashing maintenance routines   =============================*/struct HashElementStruct {   int v;   struct HashElementStruct *next;};typedef struct HashElementStruct HashElement;static HashElement **hash_table;static int *a;static int expectedN;static int mod_value;/* The hash is a chained hash, therefore find the bucket and search the   chain. */int hash_find(double *rv){   HashElement *p;   p=hash_table[h(rv)];   for (; p != NULL; p=p->next)       if ( is_equal(bins[p->v].rX, rv) ) return p->v;      return -1;}/* The hashing function. Returns an index into hash table */int h(double *v){   int i;   int sum=0;   for (i=0; i < S; i++)       sum += a[i] * v[i];   sum = sum % mod_value;   return abs(sum);}/* Initialize the hash data structure. Based on the expected number   of entries, the h() function and the hash table size is tweaked.*/int hash_init(){   int i;   expectedN= N * (1.0 - expected_reduction);   /* Allocate storage space */   hash_table=(HashElement **)CALLOC(expectedN, sizeof(HashElement *));   a=(int *)CALLOC(S,sizeof(int));   mod_value = expectedN/expected_number_of_collisions;   /* We use rand48() elsewhere, so they shouldn't? conflict. */   srand(time(NULL));   /* Initialize a table to random values */   for (i=0; i < S; i++)       a[i]=rand() % mod_value;   return 0;   }/* hash_update - The value of v was not in the hash table, insert it */int hash_update(double *v, int index){  int hash_value;  HashElement *p;  HashElement *new_hash_element;  int number_of_collisions=0;  hash_value=h(v);  p=hash_table[hash_value];  new_hash_element=(HashElement *)CALLOC(1,sizeof(HashElement));  new_hash_element->next=0;  new_hash_element->v=index;  if ( p == 0 ) {     hash_table[hash_value]=new_hash_element;  } else {     for (;p->next != 0; p=p->next, number_of_collisions++);     p->next=new_hash_element;  }  if ( number_of_collisions > max_number_of_collisions)     max_number_of_collisions=number_of_collisions;  return 0;}/*============================================================== *  * FCM routines. * *  Once the bit-reduced clustering is finished, we can run the *  full fcm against the dataset. U has already been allocated *  n entries, so we start with V (which is already initialized)  *  and cluster from there. *   *  This should not be as expensive, since we should be "mostly" *  clustered according to FCM, from using BRFCM. * *==============================================================*/int fcm();int fcm_update_centroids();double fcm_update_umatrix();int fcm_is_example_centroid();double fcm_distance();int fcm(){   double sqrerror = 2 * epsilon;   int number_of_iterations = 0;   /* Cluster centers are in V, update U matrix based on it */   fcm_update_umatrix();   /* Run the updates iteratively */   while (sqrerror > epsilon ) {      number_of_iterations++;      fcm_update_centroids();      sqrerror=fcm_update_umatrix();   }      /* We go ahead and update the centroids - presumably this will not       change much, since the overall square error in U is small */   fcm_update_centroids();   return 0;}/* If X[k] == V[i] for some i, then return that i. Otherwise, return -1 */int fcm_is_example_centroid(int k){  int  i,x;  for (i=0; i < C; i++) {    for (x=0; x < S; x++) {      if ( (double)(X[k][x]) != V[i][x] ) break;    }    if ( x == S )  /* X==V */      return i;  }  return -1;}double fcm_distance(double *v1, double *v2){  int x;  double sum=0;  for (x=0; x < S; x++)     sum += (v1[x]-v2[x]) * (v1[x]-v2[x]);  return sum;}/*    fcm_update_centroids()    Given a membership matrix U, recalculate the cluster centroids as the    "weighted" mean of each contributing example from the dataset. Each    example contributes by an amount proportional to the membership value.*/int fcm_update_centroids(){  int i,k,x;  double numerator[S], denominator;  double U_ikm;  /* For each cluster */  for (i=0; i < C; i++)  {       /* Zero out numerator and denominator options */    denominator=0;    for (x=0; x < S; x++)       numerator[x]=0;    /* Calculate numerator and denominator together */    for (k=0; k < N; k++) {      U_ikm=pow(U(i,k),m);      denominator += U_ikm;      for (x=0; x < S; x++) 	numerator[x] += U_ikm * X[k][x];    }        /* Calculate V */    for (x=0; x < S; x++) {      V[i][x]= numerator[x] / denominator;    }      }  /* endfor: C clusters */  return 0;}double fcm_update_umatrix(){  int i,j,k;  int example_is_centroid;  double summation, D_k[C];  double square_difference=0;  double newU;  /* For each example in the dataset */  for ( k=0; k < N; k++) {        /* Special case: If Example is equal to a Cluster Centroid,       then U=1.0 for that cluster and 0 for all others */    if ( (example_is_centroid=fcm_is_example_centroid(k)) != -1 ) {      for (i=0; i < C; i++) {	if ( i == example_is_centroid )	  U(i,k)=1.0;	else 	  U(i,k)=0.0;      }      continue;    }    /* Cache the distance between this vector and all centroids. */    for (i=0; i < C; i++)       D_k[i]=fcm_distance(X[k], V[i]);        /* For each class */    for (i=0; i < C; i++) {      summation=0;      /* Calculate summation */      for (j=0; j < C; j++) {	if ( i == j ) 	  summation+=1.0;	else	  summation += pow( D_k[i] / D_k[j] , (2.0/ (m-1)));      }      /* Weight is 1/sum */      newU=1.0/(double)summation;            /* Add to the square_difference */      square_difference += (U(i,k) - newU) * (U(i,k) - newU);            U(i,k)=newU;    }  } /* endfor n */    return sqrt(square_difference);}
上一页 12
💿 文件大小 28 K
👤 上传用户 spie1
📂 所属分类其他
🏷️ 相关标签

#c-means #快速实现 #模糊 #聚类算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -