📄 epc2d.c
字号:
/* EPC2 implementationimage.data image.out image.res 7 20
ion.data ion.out ion.res 7 20 Program : EPC2 Programmer : Ng Ka Ka Purpose : To use EPC2 approach for Projective Clustering Usage : cluster <datafile> <outputfile> <max_no_cluster) Status : version 2.3 Bug Fixed : 1. when the no. of actual nodes is smaller than the expect no. of nodes after merge_sort, a bug invovles invalid memory copying Fixed to handle this general case 2. don't remember which bug i have fixed Changes : 1. set the density threshold to 1SD instead of 2SD ?? no change.... 2. command line parameter max_no_cluster 3. Change influence to 1 2 3 4 3 2 1 4. Change signal-to-noise ration from 10 to 5 5. Set THRESHOLD to 2 6. Add 2D Implementation add data structures to handle 2D implementation add functions to handle 2D implementation change influence to 1 1 2 1 1 2 3 2 1 1 2 1 1 set threshold to 7 (around 5-8 is better) 10. Use real histogram (do not add influence to neighbor) set NO_INTERVAL to 40 threshold should be around 5 11. distinguish and detect signature_bit with conflicting id_plane 12. Add detecting 45, 135, 225, 315 degree in detectdense_2d 13. set outlier to be <0.1 likeness 14. modify combine_signature_bit_list compare each two signature entries and calculate their similarity similarity > 0.6 would be merged continue the merging until no combining in an iteration (Remark: reduce the number of simlarity computation by examing pairs of signature entries with diff dimension < nodimension/5 to speed up) 15. measure the user CPU time 16. modify combine_signature_bit_list in each iteration copmare every pairs of signature entries, records the pair with the highest similarity merge the pair until the highest similarity is lower than let say 0.3 Final Edit Date : 26/8/02*/#include <stdio.h>#include <stdlib.h>#include <math.h>
#include <string.h>
#ifndef WIN32#include <sys/resource.h>#endif
/* Defined internal Parameters *//* This is the suggested values, changing the values can have (good or bad) effect on clustering quality and time */#define NO_INTERVAL 20 /* should be enough for general purpose */#define NO_SLOT 1 /* -------------------------------------------------------------*//* Data Structure */typedef struct { float *component; int *signature; int *signature_2d; int clust_assg;} Point;typedef struct { long nopoint; int nodimension; Point *point;} Dataset;typedef struct { int *dense_id; int *density; int no_dense; int maxid;} Histogram_2d;typedef struct tInterval_2D *Interval_2Dptr;typedef struct tInterval_2D{ int no_point; int mid1; int left1; int right1; int mid2; int left2; int right2; Interval_2Dptr next; Interval_2Dptr previous;} Interval_2D ;typedef struct { int no_dimension; int *dimension; float *locationleft; float *locationright;} Cluster;typedef struct { int no_plane; int *dimension1; int *dimension2; int *dense_id;} Cluster_2d;typedef struct tID_Plane *ID_Planeptr;typedef struct tID_Plane{ int dimension1; int dimension2; int no_id; int *id; ID_Planeptr next;} ID_Plane;typedef struct tSignature_Bit *Signature_Bitptr;typedef struct tSignature_Bit{ float score; int no_id_plane; unsigned int *bitmap; ID_Planeptr next_plane; Signature_Bitptr next_signature;} Signature_Bit;typedef struct { int no_signature; Signature_Bitptr next;} Signature_Bit_List;/* End Data Structure *//* -------------------------------------------------------------*//* ------------------- Function Prototypes ---------------------*//* Math Functions */long factorial(int m, int n);long combination (int m, int n);/* Signature Bit Data Structure Handling Functions */Signature_Bit *derive_signature_bit (const int *signature, int nodimension, int no_entry);void insert_signature_bit (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit, int nodimension);void insert_signature_bit_score (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit);int eq_signature_bit (Signature_Bit *sig1, Signature_Bit *sig2);int insert_id_plane (int dimension1, int dimension2, int id, ID_Planeptr id_planeptr);int eq_id_plane (ID_Planeptr target_planeptr, ID_Planeptr src_planeptr);int compare_signature_bit_score (Signature_Bitptr sig1, Signature_Bitptr sig2);int compare_signature_bit (Signature_Bitptr sig1, Signature_Bitptr sig2, int nodimension);Signature_Bitptr similar_signatures (Signature_Bit *sig1, Signature_Bit *sig2, float *similarity);int common_id_plane (Signature_Bit *sig1, Signature_Bit *sig2, int *uncommon1, int *uncommon2, ID_Planeptr the);int compare_id_plane (ID_Planeptr plane1, ID_Planeptr plane2);int diff_signature_bitmap (int *sig1, int *sig2, int nodimension);int subset_signature_bitmap (int *sig1, int *sig2, int nodimension);void combine_signature_bit_list (FILE *fp, Signature_Bit_List *signature_bit_list, int nodimension);void sort_signature_bit_score (Signature_Bit_List *original, Signature_Bit_List *result);void refine_signature_bit_list (Signature_Bit_List *signature_bit_list, int no_left);void remove_signature_bit (Signature_Bit_List *signature_bit_list, int position) ;void free_id_plane (ID_Planeptr the);void print_signature_bit (FILE *fp, Signature_Bitptr signature_bit, int nodimension);void print_signature_bit_list (FILE *fp, Signature_Bit_List *signature_bit_list, int nodimension);/* Signature_2D functions */void derive_Signature_2d (Dataset *dataset, Histogram_2d *histogram, float left, float right);void print_Signature_2d (Dataset *dataset, int no_entry, Histogram_2d *histogram);/* Point Data structure Handling Functions */void readin_Points(FILE *, Dataset *);void print_Points(Dataset *);/* Histogram and Dense Regions Functions */void gen_histogram_2d(FILE *, Dataset *, float, float, Histogram_2d *);float find_threshold_histogram_2d (FILE *output, Histogram_2d *histogram, int histogram_id, int iteration);void countdense_2d (FILE *output, Histogram_2d *histogram, int histogram_id);void searchdense_2d (FILE *output, Histogram_2d *histogram, int i, int j, int histogram_id, int original, float threshold);void detectdense_2d (FILE *output, Histogram_2d *histogram, int histogram_id, float threshold, int *current_id);void print_histogram_2d (FILE *, Dataset *, Histogram_2d *, int);/* Assigning and Clustering Functions */void assign_Clusters_2d (Signature_Bit_List *signature_bit_list, Cluster_2d *theclusters, int nodimension);void assign_Points_2d (FILE *fp, Dataset *dataset, Cluster_2d *clusters_2d, int no_cluster, Histogram_2d *histogram, int left_margin, int right_margin);void print_clustasg(FILE *output, Dataset *dataset, float left_margin, float right_margin, int no_cluster);void print_clustconfig_2d(FILE *out, Cluster_2d *cluster, int nocluster);/* general utility */int dense_id_2_base (int, int, Histogram_2d *);void dense_id_2_dim (int, int, int *, int *);/* --------------------------------------------------------------*//* Global Variable */int THRESHOLD; /* to be input as command line argument *//*--------------------------------------------------------------------- Function : factorial Purpose : to calculate the factorial m / factorial n ----------------------------------------------------------------------*/long factorial(int m, int n){ long result=1; int i; for (i=m; i>n; i--) result *= i; return result;}/*--------------------------------------------------------------------- Function : combination Purpose : to calculate mCn by calling factorial ----------------------------------------------------------------------*/long combination (int m, int n){ long result; result = factorial (m, m-n)/factorial (n, 1); return result;} /*------------------------------------------------------------------------ Function : print_signature_bit Purpose : to print the given signature bit ----------------------------------------------------------------------*/void print_signature_bit (FILE *fp, Signature_Bitptr signature_bit, int nodimension){ int i, j; int position, WordNo; ID_Planeptr current_id_plane; fprintf (fp, "%.3f\t", signature_bit->score); for (i=0; i<nodimension; i++) { position = i %32; WordNo = i /32; if (((signature_bit->bitmap[WordNo]>>position) & 1) == 1) { fprintf (fp, "%d ", i); } } fprintf (fp, "\n"); current_id_plane = signature_bit->next_plane; for (i=0; i<signature_bit->no_id_plane; i++) { current_id_plane = current_id_plane->next; fprintf (fp, "[%d, %d]\t", current_id_plane->dimension1, current_id_plane->dimension2); for (j=0; j<current_id_plane->no_id; j++) { fprintf (fp, "%d ", current_id_plane->id[j]); } fprintf (fp, "\n"); }}/*------------------------------------------------------------------------ Function : print_signature_bit_list Purpose : to print the given signature bit list ----------------------------------------------------------------------*/void print_signature_bit_list (FILE *fp, Signature_Bit_List *signature_bit_list, int nodimension){ int i, j; Signature_Bitptr current_signature; fprintf (fp, "There are %d signature bits \n", signature_bit_list->no_signature); current_signature = signature_bit_list->next; for (i=0; i<signature_bit_list->no_signature; i++) { current_signature = current_signature->next_signature; print_signature_bit (fp, current_signature, nodimension); }} /*------------------------------------------------------------------------ Function : compare_signature_bit_score Purpose : to compare two given signature bit Return : 1 if score of sig1 is smaller than sig2, -1 if score of sig 2 is smaller than sign1, 0 if equal ----------------------------------------------------------------------*/int compare_signature_bit_score (Signature_Bitptr sig1, Signature_Bitptr sig2){ if (sig1->score < sig2->score) return 1; else if (sig1->score > sig2->score) return -1; else return 0;} /*------------------------------------------------------------------------ Function : compare_signature_bit Purpose : to compare the "bitmap" of two given signature bit Return : -1 if score of sig1 is smaller than sig2, 1 if score of sig 2 is smaller than sign1, 0 if equal ----------------------------------------------------------------------*/int compare_signature_bit (Signature_Bitptr sig1, Signature_Bitptr sig2, int nodimension){ int no_int, i; no_int = nodimension / 32; if (no_int*32<nodimension) no_int++; for (i=0; i<no_int; i++) { if (sig1->bitmap[i]>sig2->bitmap[i]) return 1; else if (sig1->bitmap[i]<sig2->bitmap[i]) return -1; } return 0;} /*------------------------------------------------------------------------ Function : insert_signautre_bit Purpose : to insert a signature_bit into a signature_bit_list according to the bit value ----------------------------------------------------------------------*/ void insert_signature_bit (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit, int nodimension){ Signature_Bit *current_signature; ID_Planeptr current_plane; int i, j; int compare_result; int inserted=0; current_signature = signature_bit_list->next; /* transverse the signature_bit_list */ for (i=0; i<signature_bit_list->no_signature; i++) { /* find the position where the signature_bit should be inserted into the linked list (if next signature in the list is smaller than the signature to be inserted)*/ compare_result = compare_signature_bit (current_signature->next_signature, signature_bit, nodimension); if (compare_result==1) { signature_bit->next_signature = current_signature->next_signature; current_signature->next_signature = signature_bit; signature_bit_list->no_signature++; inserted=1; break; } /* the signature_bit already exist in the linked list */ else if (compare_result == 0) { /* if the already exist signature_bit contains no conflict id_plane with signature_bit */ if (eq_signature_bit (current_signature->next_signature, signature_bit) == 1) { /* insert the id_plane into the existing signature bit */ current_plane = signature_bit->next_plane; for (j=0; j<signature_bit->no_id_plane; j++) { if ((insert_id_plane (current_plane->next->dimension1, current_plane->next->dimension2, current_plane->next->id[0], current_signature->next_signature->next_plane)) == 1) current_signature->next_signature->no_id_plane++; current_plane = current_plane->next; } /* add the score to the existing signature bit */ current_signature->next_signature->score += signature_bit->score; inserted=1; break; } /* if the already exist signature_bit contains conflict id_plane with signature_bit, continue to test next signature */ else { ; } } /* continue to search for the location */ current_signature = current_signature->next_signature; } /* case if transverse to the end of the signature_bit_list, insert signature_bit into the end */ if (inserted==0) { current_signature->next_signature = signature_bit; signature_bit->next_signature = NULL; signature_bit_list->no_signature++; } }int eq_signature_bit (Signature_Bit *sig1, Signature_Bit *sig2){ ID_Planeptr current_plane; int answer=1; current_plane = sig2->next_plane; while (current_plane->next != NULL) { if (eq_id_plane (sig1->next_plane, current_plane->next)==1) { current_plane = current_plane->next; } else { answer = 0; break; } } return answer;} /*------------------------------------------------------------------------ Function : sort_signature_bit_score Purpose : to sort signature_bit in signature_bit_list in descending order of score ----------------------------------------------------------------------*/ void sort_signature_bit_score (Signature_Bit_List *original, Signature_Bit_List *result){ Signature_Bit *current_signature=original->next; int i; for (i=0; i<original->no_signature; i++) { current_signature = current_signature->next_signature; insert_signature_bit_score(result, current_signature); }} /*------------------------------------------------------------------------ Function : insert_signautre_bit_score Purpose : to insert a signature_bit into a signature_bit_list according to the score value Called_by : sort_signature_bit_score ----------------------------------------------------------------------*/ void insert_signature_bit_score (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit){ Signature_Bit *current_signature; Signature_Bit *new_signature; int compare_result; int inserted=0; int i; new_signature = (Signature_Bit *)malloc (sizeof (Signature_Bit)); memcpy (new_signature, signature_bit, sizeof (Signature_Bit)); current_signature = signature_bit_list->next;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -