⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chameleon.c

📁 变色龙层次聚类算法
💻 C
📖 第 1 页 / 共 3 页
字号:
/****************************************************** * This is my CHAMELEON clustering program            * * Author: Weinan Wang                                * ******************************************************/#include <stdio.h> /* for sscanf, FILE* */#include <stdlib.h> /* for calloc, malloc, recalloc, free */#include <math.h> /* for sqrt */#include <string.h> /* for strlen *///#include <time.h>
#include <Winsock2.h>
//#include <sys/time.h>
/* !!!!!! *//* Define the maximum size of the data points */#define MAXSIZE 10000/* maxmum length of the file name */#define MAX_fileName 14/* !!!!!! *//* Define the K_nearest as 10, * this is according to the CHAMELEON paper p.13. *//* #define K_nearest 10 */#define K_nearest 15/*#define K5_nearest 50 *//* 5*K_nearest */#define K5_nearest 50/* !!!!!! *//* this is the balance constraint to be used in hMETIS. * this selection is according to page 10 of the CHAMELEON paper */#define BC 25/*!!!!!!*//* This is the MINSIZE over all the points, to be used in hMETIS. * this selection is according to page 13 of the CHAMELEON paper *//* #define MINSIZE 0.025 */#define MINSIZE 0.04/* !!!!!! *//* This is the domain width and height of the data set *//* This set of WIDTH and HEIGHT is for t7.10k.dat */#define WIDTH 700#define HEIGHT 500/* this is the diagonal distance */#define DIAG 860.0/* this is used for the maxmum number of groups * after phase 1's partition. */#define MAXGROUPS 100/* this alpha is used for the criterian function (p11 of the CHAMELEON paper) * for merging in phase 2. * This selection is according to p13 of the CHAMELEON paper. */#define ALPHA 300.0/*  * This data type represent a hyper edge for a point. */typedef struct{  int pointNO; /* the other point number */  double similarity; /* similarity (DIAG-distance)/DIAG */} Hyperedge;/* * This data type contains information of * one point. */typedef struct{  float x;  float y;  Hyperedge edges[K5_nearest];   /* length cannot be bigger than K5_nearest */  int length; /* current number of hyperedges */} Point;/*  * This structure represent * a node in the binary tree of  * graph partition. */struct node {  int * points; /* list of point numbers */  int numberPoints; /* number of points belongs to this node */  struct node *left; /* left subtree */  struct node *right; /* right subtree */};/************************************************** * Declare static functions                       * **************************************************/static int parsingInput(int argc, char *argv[]);static int readData();static int initialize();static int establish_hyperGraph();static double computeSimilarity(int i, int j);static int cutNode(struct node * source, struct node * left, struct node * right, int ub);static int partition(struct node* source, struct node * left, struct node * right);static int belongs(struct node *, int point);static int phase2();static int merge(int group0, int group1);static float computeGoodness(int group0, int group1);static int computeAIC_AC(struct node * node0, struct node * node1, float * aic, float * ac);static int clusterResult();static long compute_time(struct timeval start_time, struct timeval end_time);/************************************************* * Declare global variables                      * *************************************************//* data points */Point points[MAXSIZE];FILE * fp; /* file pointer pointing to the data file */FILE * out_fp; /* file pointer pointing to the output file *//* N is the total number of data points in the data file */int N;/* fileName is the data file's name */char fileName[MAX_fileName + 1];/* stopClusterNO is the number of remainning clusters after * the whole cluster process. */int stopClusterNO;/* partition tree root */struct node *root = NULL;/* threshold to decide whether stop partition */int threshold;/* this variable is for output matlab file */int index_matlab=0;/*  * this variable is used for indexing point groups  * produced in phase 1. */int groupIndex;int * groups[MAXGROUPS];int groupLength[MAXGROUPS];/* following varaibles are for merging phase in phase 2 */float bestGoodness; /* best goodness */int mergeGroups[2] = {0, 0}; /* the two groups' number should be merged *//* for timing */long time_period;struct timeval time_start, time_end;/**************************************************************** * Name    : main                                               * * Function: main function                                      * * Input   : argc, argv                                         * * Output  : void                                               * *           The usage of the rock program should be of the     * *           form:                                              * *             chameleon N fileName stopClusterNO               * *           Each parameter's meanning is :                     * *           chameleon -- program's name                        * *           int N    -- how many data points does the file has * *           string fileName -- indicate the data file          * *                              name which                      * *                               contains all the data vectors. * *           int stopClusterNO -- number of remaining clusters  * *                                at the end of clustering.     * ****************************************************************/int main(int argc, char * argv[]) {  struct node * left;  struct node * right;  int i;  if(gettimeofday(&time_start, NULL)<0)    {      printf("time error \n");      return -1;    }  printf("ALPHA is %f \n", ALPHA);  /* parse the command line */ if (parsingInput(argc, argv)<0)   {     return -1;   }  /* read in data file */ if ( readData()<0 )   {      fclose(fp);      return -1;   } /* close read in data file */ fclose(fp); /* for debugging purpose */ /* printData(); */ /* initialize data structure */ if ( initialize()<0 )   {     return -1;   } /* establish hyperGraph */ if (establish_hyperGraph()<0)   {     return -1;   } /* Now begin partition */ left = (struct node *)calloc(1, sizeof(struct node)); if(left == NULL)   {     printf("cannot allocate memory for left! \n");     return -1;   } right = (struct node *)calloc(1, sizeof(struct node)); if(right == NULL)   {     printf("cannot allocate memory for right! \n");     return -1;   } /* begin partitionning, this is a recursive program */ if(partition(root, left, right)<0)   {     printf("partition error! \n");     return -1;   } if(phase2()<0)   {     printf("phase2 error! \n");     return -1;   } if(clusterResult()<0)   {     fclose(out_fp);     printf("clusterResult error! \n");     return -1;   } fclose(out_fp); for(i=0; i<groupIndex; i++)   {     free(groups[i]);     groups[i] = NULL;   } if(gettimeofday(&time_end, NULL)<0){   printf("time error \n");   return -1; }   /* Now compute the time for sequential sorting */ time_period = compute_time(time_start, time_end);   printf("time spend is : %d \n", time_period); return 1;}/* end main *//**************************************************************** * Name    : compute_time                                       * * Function: compute the time interval between two              * *           given time structures.                             * * Input   : (struct timeval start_time),                       * *           (struct timeval end_time)                          * * Output  : long                                               * ****************************************************************/static long compute_time(struct timeval start_time, struct timeval end_time){  long sec, msec, time;  /* time interval in sec */  sec = (long)(end_time.tv_sec - start_time.tv_sec);  /* time interval in msec */  msec = (long)(end_time.tv_usec - start_time.tv_usec);  /* compute time difference in msec */  time = sec*1000000+msec;  return time;}/* end compute_time *//**************************************************************** * Name    : clusterResult                                      * * Function: output cluster result.                             * * Input   : void                                               * * Output  : int                                                * ****************************************************************/static int clusterResult(){  int i, j, count;    /* open hyperGraph file to write output */  if ((out_fp = fopen("output.m", "w")) == NULL )    {      printf("cannot open output file correctly! \n");      return -1;    }  count = 0;  for(i=0; i<groupIndex; i++){    if(groupLength[i]>0){      /* printf("c%d =[\n", count);*/      fprintf(out_fp, "%% The cluster NO is %d \n", i);      fprintf(out_fp, "c%d =[", count);      for(j=0; j<groupLength[i]; j++){	/* printf("%f %f \n", points[groups[i][j]].x, points[groups[i][j]].y);*/	fprintf(out_fp, "%f %f \n", points[groups[i][j]].x, points[groups[i][j]].y);      }/* end for */      /* printf("];\n\n");*/      fprintf(out_fp, "];\n");      count++;    }/* end if */  }/* end for */  if(count != stopClusterNO){    printf("existing cluster number is not equal to required cluster numer! \n");    return -1;  }/* end if */  return 1;}/* end clusterResult *//**************************************************************** * Name    : computeAIC_AC                                      * * Function: compute absolute inter-connectivity and absolute   * *           closeness of two nodes.                            * * Input   : struct node * node0 -- pointer to node0.           * *           struct node * node1 -- pointer to node1.           * *           float * aic -- pointer to the return value for     * *                          absolute inter-connectivity.        * *           float * ac  -- pointer to the return value for     * *                          absolute closeness.                 * * Output  :  float                                             * ****************************************************************/static int computeAIC_AC(struct node * node0, struct node * node1, float * aic, float * ac){  int count, i, j;  count = 0;  (* aic) = 0.0;  (* ac) = 0.0;  /* count the edges from node0 */  for(i=0; i<node0->numberPoints; i++){    for(j=0; j<points[node0->points[i]].length; j++){      if(belongs(node1, points[node0->points[i]].edges[j].pointNO)>0){	(* aic) = (* aic) + points[node0->points[i]].edges[j].similarity;	count++;      }/* end if */    }/* end for j */  }/* end for i */  /*  printf("!!! aic is %f,", *aic);*/  /* count the edges from node1 */  for(i=0; i<node1->numberPoints; i++){    for(j=0; j<points[node1->points[i]].length; j++){      if(belongs(node0, points[node1->points[i]].edges[j].pointNO)>0){	(* aic) = (* aic) + points[node1->points[i]].edges[j].similarity;	count++;      }/* end if */    }/* end for j */  }/* end for i */  /* printf(" aic is %f !!!", *aic);*/  /*  printf("count is %d, ", count);*/  if(count>0)    {      (* ac) = (* aic)/((float)count);    }  else    {      (*ac) = 0;      (* aic) = 0;    }  if(*ac > 10000 || *aic > 10000){    printf("ac is %e, aic is %e, count is %d \n", (*ac), (*aic), count);  }  return 1;}/* end computeAIC_AC *//**************************************************************** * Name    : computeGoodness                                    * * Function: compute goodness for merging two clusters.         * * Input   : int group0 -- group0's index.                      * *           int group1 -- group1's index.                      * * Output  :  float                                             * ****************************************************************/static float computeGoodness(int group0, int group1){  float goodness;  float ri, rc;  int ubFactor;  /* form two nodes according to two groups */  struct node * node0;  struct node * node1;  /* left and right is for cutting the node0 and node 1 to   * compute internal connectivity and internal closeness.   */  struct node * left0;  struct node * right0;  struct node * left1;  struct node * right1;  int i;  /* absolute inter-connectivity and absolute closeness */  float aic, ac, aic0, ac0, aic1, ac1;  /*!!!!!!!*/  ubFactor = 6;  /* First create node0 and node1 according to group0 and group1 */  node0 = (struct node *)calloc(1, sizeof(struct node));  node1 = (struct node *)calloc(1, sizeof(struct node));  if(node0==NULL || node1==NULL){    printf("cannot allocate memory for node0 and node1. \n");    return -1;  }/* end if */  node0->left=NULL;  node0->right=NULL;  node0->numberPoints = groupLength[group0];  node0->points = (int *)calloc(node0->numberPoints, sizeof(int));  if(node0->points == NULL){    printf("cannot allocate memory for node0->points. \n");    return -1;  }  for(i=0; i<groupLength[group0]; i++){    node0->points[i]=groups[group0][i];  }/* end for */  node1->left=NULL;  node1->right=NULL;  node1->numberPoints = groupLength[group1];  node1->points = (int *)calloc(node1->numberPoints, sizeof(int));  if(node1->points == NULL){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -