📄 epc2d.c
字号:
// printf ("(%d)%d ", i, base[i]); }// printf ("\n"); for (n=0; n<dataset->nopoint; n++) { noitem = 0; for (i=0; i<no_entry; i++) { if (dataset->point[n].signature_2d[i] !=0) noitem++; } if (noitem > 0) { printf ("%d ", noitem); for (i=1; i<no_entry; i++) { if (dataset->point[n].signature_2d[i] != 0)// printf ("%d (%d)(%d)", dataset->point[n].signature_2d[i]+base[i], dataset->point[n].signature_2d[i], base[i]); printf ("%d ", dataset->point[n].signature_2d[i]+base[i]); } printf ("\n"); } }}void assign_Clusters_2d (Signature_Bit_List *signature_bit_list, Cluster_2d *theclusters, int nodimension){ int i, j; int k, l; Signature_Bitptr current_signature = signature_bit_list->next->next_signature; ID_Planeptr current_plane; int no_bounded; int WordNo = (nodimension-1)/32; for (i=0; i<signature_bit_list->no_signature; i++) { no_bounded = 0; for (k=0; k<=WordNo; k++) { for (l=0; l<32; l++) { if ((((current_signature->bitmap[k])>>l) & 1) == 1) no_bounded++; } } theclusters[i].no_plane = current_signature->no_id_plane; theclusters[i].dimension1 = (int *)malloc (sizeof (int) * current_signature->no_id_plane); theclusters[i].dimension2 = (int *)malloc (sizeof (int) * current_signature->no_id_plane); theclusters[i].dense_id = (int *)malloc (sizeof (int) * current_signature->no_id_plane); current_plane = current_signature->next_plane->next; for (j=0; j<current_signature->no_id_plane; j++) { theclusters[i].dimension1[j] = current_plane->dimension1; theclusters[i].dimension2[j] = current_plane->dimension2; theclusters[i].dense_id[j] = current_plane->id[0]; current_plane = current_plane -> next; } current_signature = current_signature->next_signature; }}int dense_id_2_base (int position, int nodimension, Histogram_2d *histogram){ int j, k, m, base=0; int dimension1, dimension2; dense_id_2_dim (position, nodimension, &dimension1, &dimension2); for (j=0; j<dimension1; j++) { for (k=j+1; k<nodimension; k++) { if (j==dimension1 && k == dimension2) return (base); base += histogram[j*nodimension+k].no_dense; } }}void dense_id_2_dim (int position, int nodimension, int *dimension1, int *dimension2){ int i, j; int temp = position+1; *dimension1=0; for (i=nodimension-1; temp>0; i--) { (*dimension1)++; temp -= i; } i++; (*dimension1)--; temp += i; *dimension2 = *dimension1+temp;} void assign_Points_2d (FILE *fp, Dataset *dataset, Cluster_2d *clusters_2d, int no_cluster, Histogram_2d *histogram, int left_margin, int right_margin){ long n; int interval_length; int i, j, k, l; float *likeliness; int match; int interval1, interval2; float max; int maxlike; /* calculate the corresponding interval length */ interval_length = (right_margin-left_margin)/NO_INTERVAL; likeliness = (float *)malloc (sizeof (float) * no_cluster); /* For each Data point */ for (n=0; n<dataset->nopoint; n++) { fprintf (fp, "Point %d\n", n);
#ifndef WIN32 bzero (likeliness, sizeof (float ) * no_cluster);
#else
memset(likeliness, 0, sizeof(float)*no_cluster);
#endif /* calculate the likeliness to each cluster */ for (k=0; k<no_cluster; k++) { match=0; for (l=0; l<clusters_2d[k].no_plane; l++) { i = clusters_2d[k].dimension1[l]; j = clusters_2d[k].dimension2[l]; /* find interval where the point locate */ interval1 = (dataset->point[n].component[i]-left_margin)/interval_length; /* handle the case coordinate = margin */ if (interval1==NO_INTERVAL) interval1--; else if (interval1<0) interval1++; interval2 = (dataset->point[n].component[j]-left_margin)/interval_length; /* handle the case coordinate = margin */ if (interval2==NO_INTERVAL) interval2--; else if (interval2<0) interval2++; if (histogram[i*dataset->nodimension+j].dense_id[interval1*NO_INTERVAL+interval2] == clusters_2d[k].dense_id[l]) { match ++; } } likeliness[k] = (float)match/(float)clusters_2d[k].no_plane; fprintf (fp, "k = %d, likeliness = %.3f\n",k, likeliness[k]); } /* find the cluster with max. likelines */ max = 0.0; maxlike=0; for (k=0; k<no_cluster; k++) { if ((likeliness[k]>=0.1)&&(likeliness[k]>max)) { max = likeliness[k]; maxlike = k+1; } } fprintf (fp, "max is %.3f\n", max); dataset->point[n].clust_assg = maxlike; fprintf (fp, "%d is %d\n", n, dataset->point[n].clust_assg); }} /* ----------------------------------------------------------------------- Function : print_clustasg Purpose : print the final cluster assignment of each datapoint to the result file Algorithm : for each datapoint printf the Outlier Group first for each cluster configuration for each datapoint if the datapoint belong to the cluster, print it -----------------------------------------------------------------------------*/void print_clustasg(FILE *output, Dataset *dataset, float left_margin, float right_margin, int no_cluster) { long i; int j; fprintf (output, "%d\n%d\n", dataset->nopoint, dataset->nodimension); fprintf (output, "%.0f\n%.0f\n", left_margin, right_margin); fprintf (output, "/ Outlier\n"); for (i=0; i<dataset->nopoint; i++) { if (dataset->point[i].clust_assg == 0) { fprintf (output, "%d\n", i); } } for (j=0; j<no_cluster; j++) { fprintf (output, "/ Cluster %d\n", j); for (i=0; i<dataset->nopoint; i++) { if (dataset->point[i].clust_assg == j+1) { fprintf (output, "%d\n", i); } } } }void print_clustconfig_2d(FILE *out, Cluster_2d *cluster, int nocluster){ int i, j; for (i=0; i<nocluster; i++) {// for (j=0; j<cluster[i].no_plane; j++) {// fprintf (out, "Plane %d ID %d Dimensions %d %d\n", j, cluster[i].dense_id[j], cluster[i].dimension1[j], cluster[i].dimension2[j]);// } }} int main (int argc, char *argv[]){ FILE *datafile, *resultfile, *outputfile; /* the FILE pointers */ char buffer[10000]; /* char buffer to store strings from input file */ Dataset dataset; /* Structure stores and points to information of all data points */ float left_margin; /* left margin of the whole space (we assume it is the same for all dimensions */ float right_margin; /* right margin of the whole spae (we assuem it is the same for all dmiensions */ int max_no_cluster; /* maximum no. of clusters the user is interested to find out */ Histogram_2d *histogram_2d; Cluster *theclusters; /* pointer to array of structures storing final cluster configurations */ Cluster_2d *theclusters_2d; int no_cluster; /* final no of clusters discovered */ int no_cluster_2d; /* final no of clusters discovered */ long i; /* loop index for datapoint id */ int j; /* loop index */ int k; /* loop index */ int found=1; /* for find_Dense_region to indicate whether new dense is discovered (and continue the iteration */ float old_threshold; float new_threshold; int current_id; int no_entry=0; Signature_Bit_List *signature_bit_list; Signature_Bit *signature_bit; Signature_Bit_List *signature_bit_list_sorted; Signature_Bit_List *signature_bit_list_sorted_combined; int iteration;#ifndef WIN32 struct rusage start_ru, end_ru; /* for user CPU time */#endif /* Check command line argument */ if (argc!=6) { printf ("Usage : %s datafile outputfile resultfile max_no_cluster threshold\n", argv[0]); exit (1); } /* get the max_no_cluster from command line */ max_no_cluster = (atoi(argv[4])); /* Open the Files */ datafile = fopen (argv[1], "r"); outputfile = fopen (argv[2], "w"); resultfile = fopen (argv[3], "w"); THRESHOLD = atoi (argv[5]);#ifndef WIN32 /* start time */ getrusage(RUSAGE_SELF, &start_ru);#endif /* Read the first two line of data file (no_point, dimension) */ fgets(buffer, 10000, datafile); sscanf (buffer, "%ld", &(dataset.nopoint)); fgets(buffer, 10000, datafile); sscanf (buffer, "%d", &(dataset.nodimension)); fgets(buffer, 10000, datafile); sscanf (buffer, "%f", &left_margin); fgets(buffer, 10000, datafile); sscanf (buffer, "%f", &right_margin); rewind(datafile); /*------------------------------------------------------------------------------*/ /* Memory Allocation */ /* for dataset */ for (i=dataset.nodimension-1; i>0; i--) no_entry+=i; /* initialize dataset */ dataset.point = (Point *)malloc (sizeof (Point) * dataset.nopoint); for (i=0; i<dataset.nopoint; i++) { dataset.point[i].component = (float *)malloc(sizeof(float) * dataset.nodimension); dataset.point[i].signature = (int *)malloc(sizeof(int) * dataset.nodimension); dataset.point[i].signature_2d = (int *)malloc(sizeof(int) * no_entry);
#ifndef WIN32 bzero (dataset.point[i].component, sizeof (float) * dataset.nodimension); bzero (dataset.point[i].signature, sizeof (int ) * dataset.nodimension); bzero (dataset.point[i].signature_2d, sizeof (int) * no_entry);
#else
memset (dataset.point[i].component, 0, sizeof (float) * dataset.nodimension);
memset (dataset.point[i].signature, 0, sizeof (int ) * dataset.nodimension);
memset (dataset.point[i].signature_2d, 0, sizeof (int) * no_entry);
#endif dataset.point[i].clust_assg = 0; } /* initialize histogram 2d */ histogram_2d = (Histogram_2d *)malloc(sizeof(Histogram_2d) * dataset.nodimension*dataset.nodimension); for (j=0; j<dataset.nodimension*dataset.nodimension; j++) { histogram_2d[j].density = (int *)malloc (sizeof(int) * NO_INTERVAL*NO_INTERVAL); histogram_2d[j].dense_id = (int *)malloc (sizeof(int) * NO_INTERVAL*NO_INTERVAL);
#ifndef WIN32 bzero (histogram_2d[j].density, sizeof(int) * NO_INTERVAL*NO_INTERVAL); bzero (histogram_2d[j].dense_id, sizeof(int) * NO_INTERVAL*NO_INTERVAL);
#else
memset (histogram_2d[j].density, 0, sizeof(int) * NO_INTERVAL*NO_INTERVAL);
memset (histogram_2d[j].dense_id, 0, sizeof(int) * NO_INTERVAL*NO_INTERVAL);
#endif histogram_2d[j].no_dense = 0; histogram_2d[j].maxid = 0; } /* for cluster configuration */ theclusters = (Cluster *)malloc(sizeof(Cluster) * 30*max_no_cluster); theclusters_2d = (Cluster_2d *)malloc (sizeof (Cluster) * 30 * max_no_cluster);
#ifndef WIN32 bzero (theclusters, sizeof(theclusters)); bzero (theclusters_2d, sizeof(theclusters_2d));
#else
memset (theclusters, 0, sizeof(theclusters));
memset (theclusters_2d, 0, sizeof(theclusters_2d));
#endif for (i=0; i<30*max_no_cluster; i++) { theclusters[i].dimension = (int *)malloc(sizeof(int) * dataset.nodimension); theclusters[i].locationleft = (float *)malloc(sizeof(float) * dataset.nodimension); theclusters[i].locationright = (float *)malloc(sizeof(float) * dataset.nodimension); theclusters_2d[i].dimension1 = (int *)malloc(sizeof(int) * dataset.nodimension*dataset.nodimension); theclusters_2d[i].dimension2 = (int *)malloc(sizeof(int) * dataset.nodimension*dataset.nodimension); theclusters_2d[i].dense_id = (int *)malloc(sizeof(int) * dataset.nodimension*dataset.nodimension); } /* initialize signature_bit_list */ signature_bit_list = (Signature_Bit_List*) malloc (sizeof (Signature_Bit_List) * NO_SLOT); signature_bit_list_sorted = (Signature_Bit_List*) malloc (sizeof (Signature_Bit_List) * NO_SLOT); signature_bit_list_sorted->no_signature = 0; signature_bit_list_sorted->next = (Signature_Bitptr) malloc(sizeof (Signature_Bit)); signature_bit_list_sorted->next->next_signature = NULL; signature_bit_list_sorted_combined = (Signature_Bit_List*) malloc (sizeof (Signature_Bit_List) * NO_SLOT); signature_bit_list_sorted_combined->no_signature = 0; signature_bit_list_sorted_combined->next = (Signature_Bitptr) malloc(sizeof (Signature_Bit)); signature_bit_list_sorted_combined->next->next_signature = NULL; for (i=0; i<NO_SLOT; i++) { signature_bit_list[i].no_signature = 0; signature_bit_list[i].next = (Signature_Bitptr) malloc (sizeof (Signature_Bit)); signature_bit_list[i].next->next_signature = NULL;; } /* Finished Memory Allocation */ /* -----------------------------------------------------------------------------*/ /* Read In the Data */ readin_Points(datafile, &dataset); /* Generate 2-d Histograms */ gen_histogram_2d(outputfile, &dataset, left_margin, right_margin, histogram_2d); /* For each 2D plane */ /* detect dense region */ for (i=0; i<dataset.nodimension; i++) for (j=i+1; j<dataset.nodimension; j++) { iteration = 0; new_threshold = find_threshold_histogram_2d (outputfile, histogram_2d, i*dataset.nodimension+j, iteration); old_threshold = 100000.0; current_id=1;// while (old_threshold * pow (0.8, iteration) > new_threshold) { while (old_threshold > new_threshold) { old_threshold = new_threshold; detectdense_2d (outputfile , histogram_2d, i*dataset.nodimension+j, new_threshold, ¤t_id); countdense_2d(outputfile, histogram_2d, i*dataset.nodimension+j); iteration++; new_threshold = find_threshold_histogram_2d (outputfile, histogram_2d, i*dataset.nodimension+j, iteration); } printf ("Plan %d %d has %d dense region \n", i, j, histogram_2d[i*dataset.nodimension+j].no_dense); if (histogram_2d[i*dataset.nodimension+j].no_dense>0) print_histogram_2d (outputfile, &dataset, histogram_2d, i*dataset.nodimension+j); } /* Deriving 2D Signatures for all datapoints*/ derive_Signature_2d (&dataset, histogram_2d, left_margin, right_margin); /* Derive and insert Signature BIT for all datapoints */ for (i=0; i<dataset.nopoint; i++) { signature_bit = (Signature_Bit *)derive_signature_bit (dataset.point[i].signature_2d, dataset.nodimension, no_entry); insert_signature_bit (signature_bit_list, signature_bit, dataset.nodimension); } sort_signature_bit_score (signature_bit_list, signature_bit_list_sorted); refine_signature_bit_list (signature_bit_list_sorted, 40*max_no_cluster); print_signature_bit_list (outputfile, signature_bit_list_sorted, dataset.nodimension); combine_signature_bit_list (outputfile, signature_bit_list_sorted, dataset.nodimension); sort_signature_bit_score (signature_bit_list_sorted,signature_bit_list_sorted_combined); print_signature_bit_list (outputfile, signature_bit_list_sorted_combined, dataset.nodimension); refine_signature_bit_list (signature_bit_list_sorted_combined, max_no_cluster); assign_Clusters_2d (signature_bit_list_sorted_combined, theclusters_2d, dataset.nodimension); assign_Points_2d (outputfile, &dataset, theclusters_2d, signature_bit_list_sorted_combined->no_signature, histogram_2d, left_margin, right_margin); print_clustasg(resultfile, &dataset, left_margin, right_margin, signature_bit_list_sorted_combined->no_signature); print_clustconfig_2d (outputfile, theclusters_2d, signature_bit_list_sorted_combined->no_signature); print_signature_bit_list (outputfile, signature_bit_list_sorted_combined, dataset.nodimension);#ifndef WIN32 /* end time */ getrusage(RUSAGE_SELF, &end_ru); /* total user CPU time used */ fprintf(outputfile, "\n\n**** total user CPU time used: %d sec.",end_ru.ru_utime.tv_sec-start_ru.ru_utime.tv_sec);#endif /* close the FILES */ fclose(datafile); fclose(outputfile); fclose (resultfile);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -