📄 gendata.c
字号:
/* Generalized Projected Clusters Generating Synthetic Data Program : GenData Programmer : Ng Ka Ka, Eric Purpose : To generate synthetic data that would be used to do clustering Usage : gendata <ini file> <configuration file> <result data file> Status : version 1.16 Final Edit Date : 2/5 modification: 1. change printout so that calculation of confusion matrix is possible 2. force the dimension of last cluster to be fitting the model 3. force the no. of points in the last cluster to be fitting the model 4. further modify 2 to fix the prb of <=0 of last cluster dimension 15. read the parameter from the .ini file instead of command line argument range, dimension, outlier_ration, spread_parameter, scale_factor can be specified by the ini file 16. Some data is out of range, make sure all inside range*/#include <stdio.h>#include <stdlib.h>#include <math.h>/* Data Structure for a Single Point Point dimension component1, component2, ....*/typedef struct { int dimension; float *component; } Point;/* Data Structure for a Single Set of Point Set_Point no_point points1, points2, ...*/typedef struct { int no_point; Point *points; } Set_Point;/* Data Structure for a Cluster Cluster no_point points1, points2, ...*/typedef struct { int no_point; Point *points; } Cluster;/* Data Strcture for a set of Cluster Set_Cluster no_cluster cluster1, cluster2, ...*/typedef struct { int no_cluster; Cluster *cluster; } Set_Cluster;/* Data Structure for a Single Subspace (defined by dimension) Subspace no_dimension dimension1, dimension2, ...*/typedef struct { int no_dimension; int *dimension; } Subspace;/* Data Structure for a Set of Subspace Set_Subspace no_subspace dimension_set1, dimension_set2, ...*/typedef struct { int no_subspace; Subspace *subspace; } Set_Subspace;void gen_anchor (int, int, int, Set_Point *);void gen_dimension (int, int, int, Set_Subspace *);int poisson(int);void gen_rand_non_duplicate(int, int, int *);void gen_exp_ran_var (int, float, float *);int gen_no_point_cluster (int, int, float *, int *);float gen_normal_ran_var (float, float);void gen_anchor (int no_cluster, int dimension, int range, Set_Point *anchor_set){ int i, j; for (i=0; i<no_cluster; i++) for (j=0; j<dimension; j++) { /* generated +/- points randomly */ if (rand()<RAND_MAX/2) anchor_set->points[i].component[j] = (rand()*(float)range/RAND_MAX); else anchor_set->points[i].component[j] = (float) (-1.0 * (rand()*(float)range /RAND_MAX)); } }void gen_dimension (int no_cluster, int dimension, int miu, Set_Subspace *dimension_set){ int i, j, k; int temp; int search; int *tempseed; int no_correlated_dim; int temp_dimension_gen = 0; /* used to set the total dimensions gen to be fitting the model *///printf("5.1\n"); /*Allocate Maximum Memory needed for tempseed */ tempseed = (int *) malloc (sizeof (int) * dimension);//printf("5.2\n"); /* to guarantee total no. of dimensions gen to be fitting the model */ while (temp_dimension_gen != miu * no_cluster) { temp_dimension_gen = 0; /* generate no. of dimension for each cluster first */ for (i=0; i<no_cluster; i++) { temp = poisson(miu); if ((temp>2) && (temp<dimension)) { temp_dimension_gen += temp; dimension_set->subspace[i].no_dimension = temp; //free (dimension_set->subspace[i].dimension); dimension_set->subspace[i].dimension = (int *)malloc(sizeof(int) * temp); } else i--; } }//printf("5.3\n"); /* generate dimensions for the first cluster */ gen_rand_non_duplicate(dimension_set->subspace[0].no_dimension, dimension, dimension_set->subspace[0].dimension); //printf("5.4\n"); /* generate dimensions for 2 - no_cluster cluster */ for (i=1; i<no_cluster; i++) { /* find min( d(i-1), d(i)/2 ) */ if ((int)(dimension_set->subspace[i].no_dimension/2) < dimension_set->subspace[i-1].no_dimension) no_correlated_dim = (int)(dimension_set->subspace[i].no_dimension/2); else no_correlated_dim = dimension_set->subspace[i-1].no_dimension;//printf("5.5\n"); /* choose min( d(i-1), d(i)/2 ) dimensions from the (i-1) cluster */ gen_rand_non_duplicate(no_correlated_dim, dimension_set->subspace[i-1].no_dimension, tempseed); for (j=0; j<no_correlated_dim; j++) dimension_set->subspace[i].dimension[j] = dimension_set->subspace[i-1].dimension[tempseed[j]];//printf("5.6\n"); /* generate the other dimensions randomly */ for (j=no_correlated_dim; j<dimension_set->subspace[i].no_dimension; j++) {
// *** Ray changed //temp = (int)(rand()*dimension / RAND_MAX);
temp = rand()%dimension; /* make sure the newly generated number is not duplicate */ search=0; for (k=0; k<j; k++) if (temp == dimension_set->subspace[i].dimension[k]) { search = 1; break; } if (search==1) j--; else dimension_set->subspace[i].dimension[j] = temp; } //printf("5.7\n"); }}/* Borrow from FW9904 */int poisson(int mean) /*generate a random number using poisson distribution */{ float product = 1.0; int event = 0;
// ray added
#ifdef WIN32
double M_E = 2.71828182845904509080;#endif
while (product > pow(M_E, -1.0 * mean)) { product = (float) (product * ((double) rand() / RAND_MAX)); event++; } return (event);}/*--------------------------------------------------------------------- Function : gen_rand_non_duplicate (Generated Non-duplicate$ Purpose : to generate a set of no_seed non-duplicate rand$ in the range of 0-no_point Algorithm : Use rand to generate random numbers verify if the newly generated no duplicate ----------------------------------------------------------------------*/void gen_rand_non_duplicate(int no_seed, int no_point, int *seed_array){ int i, j; /* for indexing */ int search; /* for checking non_duplicate */ int temp; /* temp. for newly generated random no. */
//int tempValue;
double tempValue; for (i=0; i<no_seed; i++) { /* generate a new seed number */
//tempValue = rand()*no_point;
//temp = (int)(tempValue / RAND_MAX);
// *** Ray changed
temp = rand()%no_point;
//printf(" tempValue:%.2f RAND_MAX:%d temp:%d\n", tempValue, RAND_MAX, temp); //temp = (int)(rand()*no_point / RAND_MAX);
//temp = (int)(rand()*no_point) % RAND_MAX;
//printf("rand():%d no_point:%d //printf(" no_point:%d no_seed: %d i: %d RAND_MAX:%d temp:%d\n", no_point, no_seed, i, RAND_MAX, temp); /* make sure the newly generated number is not duplicate */ search=0; for (j=0; j<i; j++) if (temp == seed_array[j]) { search = 1; break; } if (search==1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -