📄 chameleon.c
字号:
/****************************************************** * This is my CHAMELEON clustering program * * Author: Weinan Wang * ******************************************************/#include <stdio.h> /* for sscanf, FILE* */#include <stdlib.h> /* for calloc, malloc, recalloc, free */#include <math.h> /* for sqrt */#include <string.h> /* for strlen *///#include <time.h>
#include <Winsock2.h>
//#include <sys/time.h>
/* !!!!!! *//* Define the maximum size of the data points */#define MAXSIZE 10000/* maxmum length of the file name */#define MAX_fileName 14/* !!!!!! *//* Define the K_nearest as 10, * this is according to the CHAMELEON paper p.13. *//* #define K_nearest 10 */#define K_nearest 15/*#define K5_nearest 50 *//* 5*K_nearest */#define K5_nearest 50/* !!!!!! *//* this is the balance constraint to be used in hMETIS. * this selection is according to page 10 of the CHAMELEON paper */#define BC 25/*!!!!!!*//* This is the MINSIZE over all the points, to be used in hMETIS. * this selection is according to page 13 of the CHAMELEON paper *//* #define MINSIZE 0.025 */#define MINSIZE 0.04/* !!!!!! *//* This is the domain width and height of the data set *//* This set of WIDTH and HEIGHT is for t7.10k.dat */#define WIDTH 700#define HEIGHT 500/* this is the diagonal distance */#define DIAG 860.0/* this is used for the maxmum number of groups * after phase 1's partition. */#define MAXGROUPS 100/* this alpha is used for the criterian function (p11 of the CHAMELEON paper) * for merging in phase 2. * This selection is according to p13 of the CHAMELEON paper. */#define ALPHA 300.0/* * This data type represent a hyper edge for a point. */typedef struct{ int pointNO; /* the other point number */ double similarity; /* similarity (DIAG-distance)/DIAG */} Hyperedge;/* * This data type contains information of * one point. */typedef struct{ float x; float y; Hyperedge edges[K5_nearest]; /* length cannot be bigger than K5_nearest */ int length; /* current number of hyperedges */} Point;/* * This structure represent * a node in the binary tree of * graph partition. */struct node { int * points; /* list of point numbers */ int numberPoints; /* number of points belongs to this node */ struct node *left; /* left subtree */ struct node *right; /* right subtree */};/************************************************** * Declare static functions * **************************************************/static int parsingInput(int argc, char *argv[]);static int readData();static int initialize();static int establish_hyperGraph();static double computeSimilarity(int i, int j);static int cutNode(struct node * source, struct node * left, struct node * right, int ub);static int partition(struct node* source, struct node * left, struct node * right);static int belongs(struct node *, int point);static int phase2();static int merge(int group0, int group1);static float computeGoodness(int group0, int group1);static int computeAIC_AC(struct node * node0, struct node * node1, float * aic, float * ac);static int clusterResult();static long compute_time(struct timeval start_time, struct timeval end_time);/************************************************* * Declare global variables * *************************************************//* data points */Point points[MAXSIZE];FILE * fp; /* file pointer pointing to the data file */FILE * out_fp; /* file pointer pointing to the output file *//* N is the total number of data points in the data file */int N;/* fileName is the data file's name */char fileName[MAX_fileName + 1];/* stopClusterNO is the number of remainning clusters after * the whole cluster process. */int stopClusterNO;/* partition tree root */struct node *root = NULL;/* threshold to decide whether stop partition */int threshold;/* this variable is for output matlab file */int index_matlab=0;/* * this variable is used for indexing point groups * produced in phase 1. */int groupIndex;int * groups[MAXGROUPS];int groupLength[MAXGROUPS];/* following varaibles are for merging phase in phase 2 */float bestGoodness; /* best goodness */int mergeGroups[2] = {0, 0}; /* the two groups' number should be merged *//* for timing */long time_period;struct timeval time_start, time_end;/**************************************************************** * Name : main * * Function: main function * * Input : argc, argv * * Output : void * * The usage of the rock program should be of the * * form: * * chameleon N fileName stopClusterNO * * Each parameter's meanning is : * * chameleon -- program's name * * int N -- how many data points does the file has * * string fileName -- indicate the data file * * name which * * contains all the data vectors. * * int stopClusterNO -- number of remaining clusters * * at the end of clustering. * ****************************************************************/int main(int argc, char * argv[]) { struct node * left; struct node * right; int i; if(gettimeofday(&time_start, NULL)<0) { printf("time error \n"); return -1; } printf("ALPHA is %f \n", ALPHA); /* parse the command line */ if (parsingInput(argc, argv)<0) { return -1; } /* read in data file */ if ( readData()<0 ) { fclose(fp); return -1; } /* close read in data file */ fclose(fp); /* for debugging purpose */ /* printData(); */ /* initialize data structure */ if ( initialize()<0 ) { return -1; } /* establish hyperGraph */ if (establish_hyperGraph()<0) { return -1; } /* Now begin partition */ left = (struct node *)calloc(1, sizeof(struct node)); if(left == NULL) { printf("cannot allocate memory for left! \n"); return -1; } right = (struct node *)calloc(1, sizeof(struct node)); if(right == NULL) { printf("cannot allocate memory for right! \n"); return -1; } /* begin partitionning, this is a recursive program */ if(partition(root, left, right)<0) { printf("partition error! \n"); return -1; } if(phase2()<0) { printf("phase2 error! \n"); return -1; } if(clusterResult()<0) { fclose(out_fp); printf("clusterResult error! \n"); return -1; } fclose(out_fp); for(i=0; i<groupIndex; i++) { free(groups[i]); groups[i] = NULL; } if(gettimeofday(&time_end, NULL)<0){ printf("time error \n"); return -1; } /* Now compute the time for sequential sorting */ time_period = compute_time(time_start, time_end); printf("time spend is : %d \n", time_period); return 1;}/* end main *//**************************************************************** * Name : compute_time * * Function: compute the time interval between two * * given time structures. * * Input : (struct timeval start_time), * * (struct timeval end_time) * * Output : long * ****************************************************************/static long compute_time(struct timeval start_time, struct timeval end_time){ long sec, msec, time; /* time interval in sec */ sec = (long)(end_time.tv_sec - start_time.tv_sec); /* time interval in msec */ msec = (long)(end_time.tv_usec - start_time.tv_usec); /* compute time difference in msec */ time = sec*1000000+msec; return time;}/* end compute_time *//**************************************************************** * Name : clusterResult * * Function: output cluster result. * * Input : void * * Output : int * ****************************************************************/static int clusterResult(){ int i, j, count; /* open hyperGraph file to write output */ if ((out_fp = fopen("output.m", "w")) == NULL ) { printf("cannot open output file correctly! \n"); return -1; } count = 0; for(i=0; i<groupIndex; i++){ if(groupLength[i]>0){ /* printf("c%d =[\n", count);*/ fprintf(out_fp, "%% The cluster NO is %d \n", i); fprintf(out_fp, "c%d =[", count); for(j=0; j<groupLength[i]; j++){ /* printf("%f %f \n", points[groups[i][j]].x, points[groups[i][j]].y);*/ fprintf(out_fp, "%f %f \n", points[groups[i][j]].x, points[groups[i][j]].y); }/* end for */ /* printf("];\n\n");*/ fprintf(out_fp, "];\n"); count++; }/* end if */ }/* end for */ if(count != stopClusterNO){ printf("existing cluster number is not equal to required cluster numer! \n"); return -1; }/* end if */ return 1;}/* end clusterResult *//**************************************************************** * Name : computeAIC_AC * * Function: compute absolute inter-connectivity and absolute * * closeness of two nodes. * * Input : struct node * node0 -- pointer to node0. * * struct node * node1 -- pointer to node1. * * float * aic -- pointer to the return value for * * absolute inter-connectivity. * * float * ac -- pointer to the return value for * * absolute closeness. * * Output : float * ****************************************************************/static int computeAIC_AC(struct node * node0, struct node * node1, float * aic, float * ac){ int count, i, j; count = 0; (* aic) = 0.0; (* ac) = 0.0; /* count the edges from node0 */ for(i=0; i<node0->numberPoints; i++){ for(j=0; j<points[node0->points[i]].length; j++){ if(belongs(node1, points[node0->points[i]].edges[j].pointNO)>0){ (* aic) = (* aic) + points[node0->points[i]].edges[j].similarity; count++; }/* end if */ }/* end for j */ }/* end for i */ /* printf("!!! aic is %f,", *aic);*/ /* count the edges from node1 */ for(i=0; i<node1->numberPoints; i++){ for(j=0; j<points[node1->points[i]].length; j++){ if(belongs(node0, points[node1->points[i]].edges[j].pointNO)>0){ (* aic) = (* aic) + points[node1->points[i]].edges[j].similarity; count++; }/* end if */ }/* end for j */ }/* end for i */ /* printf(" aic is %f !!!", *aic);*/ /* printf("count is %d, ", count);*/ if(count>0) { (* ac) = (* aic)/((float)count); } else { (*ac) = 0; (* aic) = 0; } if(*ac > 10000 || *aic > 10000){ printf("ac is %e, aic is %e, count is %d \n", (*ac), (*aic), count); } return 1;}/* end computeAIC_AC *//**************************************************************** * Name : computeGoodness * * Function: compute goodness for merging two clusters. * * Input : int group0 -- group0's index. * * int group1 -- group1's index. * * Output : float * ****************************************************************/static float computeGoodness(int group0, int group1){ float goodness; float ri, rc; int ubFactor; /* form two nodes according to two groups */ struct node * node0; struct node * node1; /* left and right is for cutting the node0 and node 1 to * compute internal connectivity and internal closeness. */ struct node * left0; struct node * right0; struct node * left1; struct node * right1; int i; /* absolute inter-connectivity and absolute closeness */ float aic, ac, aic0, ac0, aic1, ac1; /*!!!!!!!*/ ubFactor = 6; /* First create node0 and node1 according to group0 and group1 */ node0 = (struct node *)calloc(1, sizeof(struct node)); node1 = (struct node *)calloc(1, sizeof(struct node)); if(node0==NULL || node1==NULL){ printf("cannot allocate memory for node0 and node1. \n"); return -1; }/* end if */ node0->left=NULL; node0->right=NULL; node0->numberPoints = groupLength[group0]; node0->points = (int *)calloc(node0->numberPoints, sizeof(int)); if(node0->points == NULL){ printf("cannot allocate memory for node0->points. \n"); return -1; } for(i=0; i<groupLength[group0]; i++){ node0->points[i]=groups[group0][i]; }/* end for */ node1->left=NULL; node1->right=NULL; node1->numberPoints = groupLength[group1]; node1->points = (int *)calloc(node1->numberPoints, sizeof(int)); if(node1->points == NULL){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -