📄 gendata.c
字号:
i--; else seed_array[i] = temp; }}/*--------------------------------------------------------------------- Function : gen_exp_ran_var (generate exponential random variables) Purpose : to generate and return exponential random variables Algorithm : Generate U in (0,1) Set X = -Log (1-U)/ (1/mean) ----------------------------------------------------------------------*/void gen_exp_ran_var (int k, float mean, float *exp){ int i; float u; for (i=0; i<k; i++) { u = (float)(rand())/(float)(RAND_MAX); exp[i] = (float) (-1 * log (1-u)); }} /*--------------------------------------------------------------------- Function : gen_no_point_cluster (generate No. of point for each cluster) Purpose : to generate No. of point for each cluster Algorithm : no_point_cluster[i] = no_nonoutlier * (exp[i]/summation of exp) ----------------------------------------------------------------------*/int gen_no_point_cluster (int no_cluster, int no_nonoutlier, float *exp, int *return_value){ int real_no_point = 0; int i; float sum = 0.0; for (i=0; i<no_cluster; i++) sum += exp[i]; for (i=0; i<no_cluster; i++) { return_value[i] = (int) (no_nonoutlier * (exp[i] / sum)); real_no_point += return_value[i]; } return real_no_point; }/*--------------------------------------------------------------------- Function : gen_normal_ran_var (generate normal random variable) Purpose : to generate normal random variables Algorithm : generate n uniform [0,1] random numbers, and sum them up. Then shift and scale this sum to have the appropriate mean and variance to give us a roughly N(0,1) random variable. sum(X_i) - n/2 Z = -------------- sqrt(n/12) ----------------------------------------------------------------------*/float gen_normal_ran_var (float mean, float variance){ int j; int n=5000; /* can be set, should be higher the closer the no appro normal distribution */ float normal_ran_var; normal_ran_var = 0.0; for (j=0; j<n; j++) normal_ran_var += (float)(rand())/(float)(RAND_MAX); normal_ran_var = (float) ((normal_ran_var - n/2) / sqrt((float)(n/12))); return (mean + normal_ran_var * variance);}int main (int argc, char *argv[]){ int no_cluster; int miu; int no_point; FILE *ini; FILE *conf; FILE *dat; int range; /* set by ini file */ int dimension; /* set by ini file */ float outlier_ratio; /* set by ini file*/ int spread_parameter; /* set by ini file */ int scale_factor; /* set by ini file*/ int no_nonoutlier; int i, j, k, m, n; /* for indexing */ int pointid = 0; /* to identify and print out the point id */ Set_Point anchor_set; Set_Subspace dimension_set; float *exp; /* expoonential random variables */ int *no_point_cluster; /* number of points in each cluster */ float mean, variance; /* mean and variance for normal distribution */ int search; /* indicate whether found or not */ float temp; /* use as temp variable to ensure the generated coordinate is within the range *///printf("%.20f\n", M_E);
//printf("%d\n", M_E);
//printf("%d\n", sizeof(M_E));
//printf("%d\n", sizeof(double)); if (argc != 4) { printf ("Usage: gendata ini_file conf_file dat_file\n"); exit(0); } /* set output file */ ini = fopen (argv[1], "r"); conf = fopen (argv[2], "w"); dat = fopen (argv[3], "w"); /* Readin parameters specified in the ini file */ fscanf (ini, "%d\n", &no_point); fscanf (ini, "%d\n", &no_cluster); fscanf (ini, "%d\n", &miu); fscanf (ini, "%d\n", &range); fscanf (ini, "%d\n", &dimension); fscanf (ini, "%f\n", &outlier_ratio); fscanf (ini, "%d\n", &spread_parameter); fscanf (ini, "%d\n", &scale_factor); /* Allocate and initialize memory for anchor_set */ anchor_set.no_point = no_cluster; anchor_set.points = (Point *) malloc (sizeof(Point) * no_cluster); for (i=0; i<no_cluster; i++) { anchor_set.points[i].dimension = dimension; anchor_set.points[i].component = (float *)malloc(sizeof(float) * dimension); }//printf("0\n"); /* Allocate and initialize memory for dimension_set */ dimension_set.no_subspace = no_cluster; dimension_set.subspace = (Subspace *) malloc (sizeof(Subspace) * no_cluster); /* Allocate and initialize memory for no of points in each cluster, exp_rand */ no_point_cluster = (int *) malloc (sizeof(int) * no_cluster); exp = (float *) malloc (sizeof(float) * no_cluster); srand((unsigned)time(NULL));//printf("0.1\n"); /* Generate Anchor Point and their associated dimension */ gen_anchor (no_cluster, dimension, range, &anchor_set);
//printf("0.2\n"); gen_dimension (no_cluster, dimension, miu, &dimension_set);//printf("1\n"); /* Generate No. of Point for each cluster */ gen_exp_ran_var (no_cluster, 1, exp); no_nonoutlier = (int) (no_point * (1-outlier_ratio)); no_nonoutlier = gen_no_point_cluster (no_cluster, no_nonoutlier, exp, no_point_cluster);//printf("2\n"); /* Print conf to Configuration File */ for (i=0; i<no_cluster; i++) { fprintf (conf, "\n\nCluster %d has %d points\n anchor point is :", i, no_point_cluster[i]); for (j=0; j<anchor_set.points[i].dimension; j++) fprintf (conf, "\t%.3f", anchor_set.points[i].component[j]); fprintf (conf, "\n dimension is :"); for (j=0; j<dimension_set.subspace[i].no_dimension; j++) fprintf (conf, "\t%d", dimension_set.subspace[i].dimension[j]); } fclose (conf);//printf("3\n"); /* Final Output Print to Data File */ fprintf (dat, "%d\n%d\n%d\n%d\n", no_point, dimension, -range, range); fprintf (dat, "/ Outliers\n"); /* Generate i outliers */ for (i=0; i<no_point - no_nonoutlier; i++) { fprintf (dat, "%d\t", pointid++); /* print the point id */ /* Each outliers, generate j components */ for (j=0; j<dimension; j++) { /* Make the variation of +/- */ if (rand()<RAND_MAX/2) fprintf (dat, "%-.3f\t",(double)rand()/RAND_MAX*range); else fprintf (dat, "%-.3f\t",(double)rand()/RAND_MAX*-range); } fprintf (dat, "\n"); }//printf("4\n"); /* Generate Points in each Cluster */ /* For each cluster */ for (i=0; i<no_cluster; i++) { fprintf (dat, "/ Cluster%d\n", i); /* print the point id */ /* For each point */ for (j=0; j<no_point_cluster[i]; j++) { fprintf (dat, "%d\t", pointid++); /* For each dimension */ for (k=0; k<dimension; k++) { search = 0; /* Search if it is associated dimension */ for (m=0; m<dimension_set.subspace[i].no_dimension; m++) if (k == dimension_set.subspace[i].dimension[m]) { search = 1; break; } /* Rand Gen for non-cluster dimension */ if (search == 0) { /* Make the variation of +/- */ if (rand()<RAND_MAX/2) fprintf (dat, "%-.3f\t", (float)(rand()*range)/RAND_MAX); else fprintf (dat, "%-.3f\t", -1 * (float)(rand()*range)/RAND_MAX); } /* Use Normal distribution for cluster dimension */ else { mean = anchor_set.points[i].component[k]; variance = (float) pow(((float)(1+rand()*(scale_factor-1))/RAND_MAX) * spread_parameter, 2.0); while (((temp = gen_normal_ran_var(mean, variance)) >range) || (temp <-range)) ; fprintf (dat, "%-.3f\t", temp); } } fprintf (dat, "\n"); } } fclose (dat);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -