⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gendata.c

📁 一个投影聚类算法及其数据集生成源码。 参考文献: Eric K.K. Ng, A. Fu : Efficient algorithm for Projected Clustering,
💻 C
📖 第 1 页 / 共 2 页
字号:
/*	Generalized Projected Clusters		Generating Synthetic Data	Program		:	GenData	Programmer	:	Ng Ka Ka, Eric	Purpose		:	To generate synthetic data that would be used to do clustering	Usage		:	gendata <ini file> <configuration file> <result data file>	Status		:	version 1.16	Final Edit Date	:	2/5	modification:		1.	change printout so that calculation of confusion matrix is possible		2.	force the dimension of last cluster to be fitting the model		3.	force the no. of points in the last cluster to be fitting the model		4.	further modify 2 to fix the prb of <=0 of last cluster dimension		15.	read the parameter from the .ini file instead of command line argument			range, dimension, outlier_ration, spread_parameter, scale_factor can be specified by the ini file		16.	Some data is out of range, make sure all inside range*/#include <stdio.h>#include <stdlib.h>#include <math.h>/* Data Structure for a Single Point        Point                dimension                component1, component2, ....*/typedef struct        {       int dimension;                float *component;        } Point;/* Data Structure for a Single Set of Point        Set_Point                no_point                points1, points2, ...*/typedef struct        {       int no_point;                Point *points;        } Set_Point;/* Data Structure for a Cluster        Cluster                no_point                points1, points2, ...*/typedef struct        {       int no_point;                Point *points;        } Cluster;/* Data Strcture for a set of Cluster        Set_Cluster                no_cluster                cluster1, cluster2, ...*/typedef struct        {       int no_cluster;                         Cluster *cluster;        } Set_Cluster;/* Data Structure for a Single Subspace (defined by dimension)        Subspace                no_dimension                dimension1, dimension2, ...*/typedef struct         {                int no_dimension;                int *dimension;        } Subspace;/* Data Structure for a Set of Subspace        Set_Subspace                no_subspace                dimension_set1, dimension_set2, ...*/typedef struct        {                int no_subspace;                Subspace *subspace;        } Set_Subspace;void gen_anchor (int, int, int, Set_Point *);void gen_dimension (int, int, int, Set_Subspace *);int poisson(int);void gen_rand_non_duplicate(int, int, int *);void gen_exp_ran_var (int, float, float *);int gen_no_point_cluster (int, int, float *, int *);float gen_normal_ran_var (float, float);void gen_anchor (int no_cluster, int dimension, int range, Set_Point *anchor_set){	int i, j;	for (i=0; i<no_cluster; i++)		for (j=0; j<dimension; j++)		{			/* generated +/- points randomly */			if (rand()<RAND_MAX/2)	                	anchor_set->points[i].component[j] = (rand()*(float)range/RAND_MAX);			else	                	anchor_set->points[i].component[j] = (float) (-1.0 * (rand()*(float)range /RAND_MAX));		}	}void gen_dimension (int no_cluster, int dimension, int miu, Set_Subspace *dimension_set){	int i, j, k;	int temp;	int search;	int *tempseed;	int no_correlated_dim;	int temp_dimension_gen = 0;			/* used to set the total dimensions gen to be fitting the model *///printf("5.1\n");	/*Allocate Maximum Memory needed for tempseed */	tempseed = (int *) malloc (sizeof (int) * dimension);//printf("5.2\n");	/* to guarantee total no. of dimensions gen to be fitting the model */	while (temp_dimension_gen != miu * no_cluster)	{	temp_dimension_gen = 0;	/* generate no. of dimension for each cluster first */	for (i=0; i<no_cluster; i++)	{			temp = poisson(miu);		if ((temp>2) && (temp<dimension))		{			temp_dimension_gen += temp;			dimension_set->subspace[i].no_dimension = temp;			//free (dimension_set->subspace[i].dimension);			dimension_set->subspace[i].dimension = (int *)malloc(sizeof(int) * temp);		}		else			i--;	}	}//printf("5.3\n");		/* generate dimensions for the first cluster */	gen_rand_non_duplicate(dimension_set->subspace[0].no_dimension, dimension, dimension_set->subspace[0].dimension);	//printf("5.4\n");	/* generate dimensions for 2 - no_cluster cluster */	for (i=1; i<no_cluster; i++)	{		/* find min( d(i-1), d(i)/2 ) */		if ((int)(dimension_set->subspace[i].no_dimension/2) < dimension_set->subspace[i-1].no_dimension)				no_correlated_dim = (int)(dimension_set->subspace[i].no_dimension/2);		else				no_correlated_dim = dimension_set->subspace[i-1].no_dimension;//printf("5.5\n");			/* choose min( d(i-1), d(i)/2 ) dimensions from the (i-1) cluster */		gen_rand_non_duplicate(no_correlated_dim, dimension_set->subspace[i-1].no_dimension, tempseed);		for (j=0; j<no_correlated_dim; j++)			dimension_set->subspace[i].dimension[j] = dimension_set->subspace[i-1].dimension[tempseed[j]];//printf("5.6\n");		/* generate the other dimensions randomly */		for (j=no_correlated_dim; j<dimension_set->subspace[i].no_dimension; j++)		{
					// *** Ray changed	                //temp = (int)(rand()*dimension / RAND_MAX);
					temp = rand()%dimension;	                /* make sure the newly generated number is not duplicate */        	        search=0;                	for (k=0; k<j; k++)                        if (temp == dimension_set->subspace[i].dimension[k])                        {                                search = 1;                                break;                        }	                if (search==1)        	                j--;                	else				dimension_set->subspace[i].dimension[j] = temp;		}	//printf("5.7\n");			}}/*	Borrow from FW9904 */int poisson(int mean) /*generate a random number using poisson distribution */{  float product = 1.0;  int event = 0;

  // ray added
#ifdef WIN32
  double M_E = 2.71828182845904509080;#endif
  while (product > pow(M_E, -1.0 * mean)) {    product = (float) (product * ((double) rand() / RAND_MAX));    event++;  }  return (event);}/*---------------------------------------------------------------------        Function        :       gen_rand_non_duplicate (Generated Non-duplicate$        Purpose         :       to generate a set of no_seed non-duplicate rand$                                in the range of 0-no_point        Algorithm       :       Use rand to generate random numbers                                verify if the newly generated no duplicate  ----------------------------------------------------------------------*/void gen_rand_non_duplicate(int no_seed, int no_point, int *seed_array){        int i, j;                       /* for indexing */        int search;                     /* for checking non_duplicate */        int temp;                       /* temp. for newly generated random no. */

		//int tempValue;
		double tempValue;                for (i=0; i<no_seed; i++)        {                /* generate a new seed number */
				//tempValue = rand()*no_point;
				//temp = (int)(tempValue / RAND_MAX);
				// *** Ray changed
				temp = rand()%no_point;
//printf("  tempValue:%.2f   RAND_MAX:%d  temp:%d\n", tempValue, RAND_MAX, temp);                //temp = (int)(rand()*no_point / RAND_MAX);
				//temp = (int)(rand()*no_point) % RAND_MAX;
//printf("rand():%d  no_point:%d //printf(" no_point:%d  no_seed: %d   i: %d  RAND_MAX:%d  temp:%d\n", no_point, no_seed, i, RAND_MAX, temp);	                /* make sure the newly generated number is not duplicate */                search=0;                for (j=0; j<i; j++)                        if (temp == seed_array[j])                        {                                search = 1;                                break;                        }                if (search==1)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -