⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gendata.c

📁 一个投影聚类算法及其数据集生成源码。 参考文献: Eric K.K. Ng, A. Fu : Efficient algorithm for Projected Clustering,
💻 C
📖 第 1 页 / 共 2 页
字号:
                        i--;                else                        seed_array[i] = temp;        }}/*---------------------------------------------------------------------        Function        :       gen_exp_ran_var (generate exponential random variables)        Purpose         :       to generate and return exponential random variables        Algorithm       :       Generate U in (0,1)                                Set X = -Log (1-U)/ (1/mean)  ----------------------------------------------------------------------*/void gen_exp_ran_var (int k, float mean, float *exp){	int i;	float u;	for (i=0; i<k; i++)	{		u = (float)(rand())/(float)(RAND_MAX);		exp[i] = (float) (-1 * log (1-u));	}}		/*---------------------------------------------------------------------        Function        :       gen_no_point_cluster (generate No. of point for each cluster)        Purpose         :       to generate No. of point for each cluster        Algorithm       :       no_point_cluster[i] = no_nonoutlier * (exp[i]/summation of exp)  ----------------------------------------------------------------------*/int gen_no_point_cluster (int no_cluster, int no_nonoutlier, float *exp, int *return_value){	int real_no_point = 0;	int i;	float sum = 0.0;	for (i=0; i<no_cluster; i++)		sum += exp[i];	for (i=0; i<no_cluster; i++)	{		return_value[i] = (int) (no_nonoutlier * (exp[i] / sum));		real_no_point += return_value[i];	}	return real_no_point; }/*---------------------------------------------------------------------        Function        :       gen_normal_ran_var (generate normal random variable)        Purpose         :       to generate normal random variables        Algorithm       :       generate n uniform [0,1] random numbers, and sum them				up. Then shift and scale this sum to have the appropriate mean				and variance to give us a roughly N(0,1) random variable. 					    sum(X_i) - n/2					Z = --------------      						sqrt(n/12)  ----------------------------------------------------------------------*/float gen_normal_ran_var (float mean, float variance){	int j;	int n=5000; 	/* can be set, should be higher the closer the no appro normal distribution */	float normal_ran_var;		normal_ran_var = 0.0;	for (j=0; j<n; j++)		normal_ran_var += (float)(rand())/(float)(RAND_MAX);	normal_ran_var = (float) ((normal_ran_var - n/2) / sqrt((float)(n/12)));	return (mean + normal_ran_var * variance);}int main (int argc, char *argv[]){	int no_cluster;	int miu;	int no_point;	FILE *ini;	FILE *conf;	FILE *dat;	int range;		/* set by ini file */	int dimension; 		/* set by ini file */	float outlier_ratio;	/* set by ini file*/	int spread_parameter;		/* set by ini file */	int scale_factor;		/* set by ini file*/	int no_nonoutlier;	int i, j, k, m, n;		/* for indexing */	int pointid = 0;		/* to identify and print out the point id */	Set_Point anchor_set;	Set_Subspace dimension_set;	float *exp;			/* expoonential random variables */	int *no_point_cluster;		/* number of points in each cluster */	float mean, variance;		/* mean and variance for normal distribution */	int search;			/* indicate whether found or not */	float temp;			/* use as temp variable to ensure the generated coordinate is within the range *///printf("%.20f\n", M_E);
//printf("%d\n", M_E);
//printf("%d\n", sizeof(M_E));
//printf("%d\n", sizeof(double));	if (argc != 4)	{		printf ("Usage: gendata ini_file conf_file dat_file\n");		exit(0);	}			/* set output file */	ini = fopen (argv[1], "r");	conf = fopen (argv[2], "w");	dat = fopen (argv[3], "w");	/* Readin parameters specified in the ini file */	fscanf (ini, "%d\n", &no_point);	fscanf (ini, "%d\n", &no_cluster);	fscanf (ini, "%d\n", &miu);	fscanf (ini, "%d\n", &range);		fscanf (ini, "%d\n", &dimension);	fscanf (ini, "%f\n", &outlier_ratio);		fscanf (ini, "%d\n", &spread_parameter);	fscanf (ini, "%d\n", &scale_factor);		/* Allocate and initialize memory for anchor_set */	anchor_set.no_point = no_cluster;	anchor_set.points = (Point *) malloc (sizeof(Point) * no_cluster);	for (i=0; i<no_cluster; i++)	{		anchor_set.points[i].dimension = dimension;		anchor_set.points[i].component = (float *)malloc(sizeof(float) * dimension);	}//printf("0\n");	/* Allocate and initialize memory for dimension_set */	dimension_set.no_subspace = no_cluster;	dimension_set.subspace = (Subspace *) malloc (sizeof(Subspace) * no_cluster);	/* Allocate and initialize memory for no of points in each cluster, exp_rand */	no_point_cluster = (int *) malloc (sizeof(int) * no_cluster);	exp = (float *) malloc (sizeof(float) * no_cluster);        srand((unsigned)time(NULL));//printf("0.1\n");	/* Generate Anchor Point and their associated dimension */	gen_anchor (no_cluster, dimension, range, &anchor_set);
//printf("0.2\n");	gen_dimension (no_cluster, dimension, miu, &dimension_set);//printf("1\n");	/* Generate No. of Point for each cluster */	gen_exp_ran_var (no_cluster, 1, exp);	no_nonoutlier = (int) (no_point * (1-outlier_ratio));	no_nonoutlier = gen_no_point_cluster (no_cluster, no_nonoutlier, exp, no_point_cluster);//printf("2\n");	/* Print conf to Configuration File */	for (i=0; i<no_cluster; i++)	{		fprintf (conf, "\n\nCluster %d has %d points\n anchor point is :", i, no_point_cluster[i]);		for (j=0; j<anchor_set.points[i].dimension; j++)			fprintf (conf, "\t%.3f", anchor_set.points[i].component[j]);		fprintf (conf, "\n dimension is :");		for (j=0; j<dimension_set.subspace[i].no_dimension; j++)			fprintf (conf, "\t%d", dimension_set.subspace[i].dimension[j]);	}	fclose (conf);//printf("3\n");	/* Final Output Print to Data File */	fprintf (dat, "%d\n%d\n%d\n%d\n", no_point, dimension, -range, range);	fprintf (dat, "/ Outliers\n");	/* Generate i  outliers */        for (i=0; i<no_point - no_nonoutlier; i++)          {		fprintf (dat, "%d\t", pointid++);		/* print the point id */                /* Each outliers, generate j components */                for (j=0; j<dimension; j++)                {                        /* Make the variation of +/- */                        if (rand()<RAND_MAX/2)                                 fprintf (dat, "%-.3f\t",(double)rand()/RAND_MAX*range);                        else    fprintf (dat, "%-.3f\t",(double)rand()/RAND_MAX*-range);                }                fprintf (dat, "\n");            }//printf("4\n");	/* Generate Points in each Cluster */	/* For each cluster */	for (i=0; i<no_cluster; i++)	{		fprintf (dat, "/ Cluster%d\n", i);		/* print the point id */		/* For each point */		for (j=0; j<no_point_cluster[i]; j++)		{			fprintf (dat, "%d\t", pointid++);			/* For each dimension */			for (k=0; k<dimension; k++)			{				search = 0;				/* Search if it is associated dimension */				for (m=0; m<dimension_set.subspace[i].no_dimension; m++)					if (k == dimension_set.subspace[i].dimension[m])					{						search = 1;						break;					}				/* Rand Gen for non-cluster dimension */				if (search == 0)				{		                        /* Make the variation of +/- */                		        if (rand()<RAND_MAX/2) 						fprintf (dat, "%-.3f\t", (float)(rand()*range)/RAND_MAX);					else	fprintf (dat, "%-.3f\t", -1 * (float)(rand()*range)/RAND_MAX);				}				/* Use Normal distribution for cluster dimension */				else				{						mean = anchor_set.points[i].component[k];					variance = (float) pow(((float)(1+rand()*(scale_factor-1))/RAND_MAX) * spread_parameter, 2.0);					while (((temp = gen_normal_ran_var(mean, variance)) >range) || (temp <-range)) 						;					fprintf (dat, "%-.3f\t", temp);				}			}			fprintf (dat, "\n");		}	}		fclose (dat);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -